diff a/src/hotspot/share/code/debugInfo.cpp b/src/hotspot/share/code/debugInfo.cpp --- a/src/hotspot/share/code/debugInfo.cpp +++ b/src/hotspot/share/code/debugInfo.cpp @@ -62,19 +62,33 @@ } assert(oopDesc::is_oop_or_null(o), "oop only"); return o; } -ScopeValue* DebugInfoReadStream::read_object_value(bool is_auto_box) { +enum { LOCATION_CODE = 0, CONSTANT_INT_CODE = 1, CONSTANT_OOP_CODE = 2, + CONSTANT_LONG_CODE = 3, CONSTANT_DOUBLE_CODE = 4, + OBJECT_CODE = 5, OBJECT_ID_CODE = 6, + AUTO_BOX_OBJECT_CODE = 7, MARKER_CODE = 8, + STACK_OBJECT_CODE = 9 }; + +ScopeValue* DebugInfoReadStream::read_object_value(int type) { int id = read_int(); #ifdef ASSERT assert(_obj_pool != NULL, "object pool does not exist"); for (int i = _obj_pool->length() - 1; i >= 0; i--) { assert(_obj_pool->at(i)->as_ObjectValue()->id() != id, "should not be read twice"); } #endif - ObjectValue* result = is_auto_box ? new AutoBoxObjectValue(id) : new ObjectValue(id); + ObjectValue* result; + if (type == AUTO_BOX_OBJECT_CODE) { + result = new AutoBoxObjectValue(id); + } else if (type == STACK_OBJECT_CODE) { + result = new StackObjectValue(id); + } else { + assert(type == OBJECT_CODE, "has to be an object"); + result = new ObjectValue(id); + } // Cache the object since an object field could reference it. _obj_pool->push(result); result->read_object(this); return result; } @@ -92,25 +106,21 @@ return NULL; } // Serializing scope values -enum { LOCATION_CODE = 0, CONSTANT_INT_CODE = 1, CONSTANT_OOP_CODE = 2, - CONSTANT_LONG_CODE = 3, CONSTANT_DOUBLE_CODE = 4, - OBJECT_CODE = 5, OBJECT_ID_CODE = 6, - AUTO_BOX_OBJECT_CODE = 7, MARKER_CODE = 8 }; - ScopeValue* ScopeValue::read_from(DebugInfoReadStream* stream) { ScopeValue* result = NULL; switch(stream->read_int()) { case LOCATION_CODE: result = new LocationValue(stream); break; case CONSTANT_INT_CODE: result = new ConstantIntValue(stream); break; case CONSTANT_OOP_CODE: result = new ConstantOopReadValue(stream); break; case CONSTANT_LONG_CODE: result = new ConstantLongValue(stream); break; case CONSTANT_DOUBLE_CODE: result = new ConstantDoubleValue(stream); break; - case OBJECT_CODE: result = stream->read_object_value(false /*is_auto_box*/); break; - case AUTO_BOX_OBJECT_CODE: result = stream->read_object_value(true /*is_auto_box*/); break; + case OBJECT_CODE: result = stream->read_object_value(OBJECT_CODE); break; + case AUTO_BOX_OBJECT_CODE: result = stream->read_object_value(AUTO_BOX_OBJECT_CODE); break; + case STACK_OBJECT_CODE: result = stream->read_object_value(STACK_OBJECT_CODE); break; case OBJECT_ID_CODE: result = stream->get_cached_object(); break; case MARKER_CODE: result = new MarkerValue(); break; default: ShouldNotReachHere(); } return result; @@ -188,10 +198,44 @@ _field_values.at(i)->print_on(st); } #endif } +// StackObjectValue + +StackObjectValue::StackObjectValue(int id, ScopeValue* klass, Location location, ConstantIntValue *field_length) +: ObjectValue(id, klass) +, _location(location) +, _field_length(field_length) +{ +} + +void StackObjectValue::read_object(DebugInfoReadStream* stream) { + ObjectValue::read_object(stream); + _location = Location(stream); + _field_length = (ConstantIntValue *)read_from(stream); +} + +void StackObjectValue::write_on(DebugInfoWriteStream* stream) { + if (_visited) { + stream->write_int(OBJECT_ID_CODE); + stream->write_int(_id); + } else { + _visited = true; + stream->write_int(STACK_OBJECT_CODE); + stream->write_int(_id); + _klass->write_on(stream); + int length = _field_values.length(); + stream->write_int(length); + for (int i = 0; i < length; i++) { + _field_values.at(i)->write_on(stream); + } + _location.write_on(stream); + _field_length->write_on(stream); + } +} + // ConstantIntValue ConstantIntValue::ConstantIntValue(DebugInfoReadStream* stream) { _value = stream->read_signed_int(); } diff a/src/hotspot/share/code/debugInfo.hpp b/src/hotspot/share/code/debugInfo.hpp --- a/src/hotspot/share/code/debugInfo.hpp +++ b/src/hotspot/share/code/debugInfo.hpp @@ -143,12 +143,14 @@ bool is_visited() const { return _visited; } void set_value(oop value); void set_visited(bool visited) { _visited = false; } + virtual bool is_stack_object() { return false; } + // Serialization of debugging information - void read_object(DebugInfoReadStream* stream); + virtual void read_object(DebugInfoReadStream* stream); void write_on(DebugInfoWriteStream* stream); // Printing void print_on(outputStream* st) const; void print_fields_on(outputStream* st) const; @@ -183,10 +185,29 @@ // Printing void print_on(outputStream* st) const; }; +class StackObjectValue: public ObjectValue { +private: + Location _location; + ConstantIntValue *_field_length; +public: + StackObjectValue(int id, ScopeValue* klass, Location location, ConstantIntValue *field_length); + StackObjectValue(int id) : ObjectValue(id), _location(), _field_length(NULL) { } + + Location get_stack_location() { return _location; } + ConstantIntValue* get_field_length() { return _field_length; } + + bool is_stack_object(){ return true; } + + // Serialization of debugging information + void read_object(DebugInfoReadStream* stream); + void write_on(DebugInfoWriteStream* stream); +}; + + class ConstantLongValue: public ScopeValue { private: jlong _value; public: ConstantLongValue(jlong value) { _value = value; } @@ -302,11 +323,11 @@ Method* o = (Method*)(code()->metadata_at(read_int())); // is_metadata() is a faster check than is_metaspace_object() assert(o == NULL || o->is_metadata(), "meta data only"); return o; } - ScopeValue* read_object_value(bool is_auto_box); + ScopeValue* read_object_value(int type); ScopeValue* get_cached_object(); // BCI encoding is mostly unsigned, but -1 is a distinguished value int read_bci() { return read_int() + InvocationEntryBci; } }; diff a/src/hotspot/share/compiler/compilerDefinitions.cpp b/src/hotspot/share/compiler/compilerDefinitions.cpp --- a/src/hotspot/share/compiler/compilerDefinitions.cpp +++ b/src/hotspot/share/compiler/compilerDefinitions.cpp @@ -520,10 +520,19 @@ } if (FLAG_IS_DEFAULT(LoopStripMiningIterShortLoop)) { // blind guess LoopStripMiningIterShortLoop = LoopStripMiningIter / 10; } + if (UseStackAllocation) { + if (!(UseSerialGC || UseParallelGC || UseG1GC)) { + vm_exit_during_initialization("UseStackAllocation is not supported with selected GC", GCConfig::hs_err_name()); + FLAG_SET_DEFAULT(UseStackAllocation, false); + FLAG_SET_ERGO(UseStackAllocationRuntime, false); + } else { + FLAG_SET_ERGO(UseStackAllocationRuntime, true); + } + } #endif // COMPILER2 } static CompLevel highest_compile_level() { return TieredCompilation ? MIN2((CompLevel) TieredStopAtLevel, CompLevel_highest_tier) : CompLevel_highest_tier; diff a/src/hotspot/share/compiler/compilerDirectives.hpp b/src/hotspot/share/compiler/compilerDirectives.hpp --- a/src/hotspot/share/compiler/compilerDirectives.hpp +++ b/src/hotspot/share/compiler/compilerDirectives.hpp @@ -67,11 +67,15 @@ NOT_PRODUCT(cflags(IGVPrintLevel, intx, PrintIdealGraphLevel, IGVPrintLevel)) \ cflags(TraceSpilling, bool, TraceSpilling, TraceSpilling) \ cflags(Vectorize, bool, false, Vectorize) \ cflags(VectorizeDebug, uintx, 0, VectorizeDebug) \ cflags(CloneMapDebug, bool, false, CloneMapDebug) \ - cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) + cflags(MaxNodeLimit, intx, MaxNodeLimit, MaxNodeLimit) \ +NOT_PRODUCT(cflags(PrintEscapeAnalysis, bool, PrintEscapeAnalysis, PrintEscapeAnalysis)) \ +NOT_PRODUCT(cflags(PrintEliminateAllocations, bool, PrintEliminateAllocations, PrintEliminateAllocations)) \ + cflags(UseStackAllocation, bool, UseStackAllocation, UseStackAllocation) \ +NOT_PRODUCT(cflags(PrintStackAllocation, bool, PrintStackAllocation, PrintStackAllocation)) #else #define compilerdirectives_c2_flags(cflags) #endif class CompilerDirectives; diff a/src/hotspot/share/compiler/oopMap.cpp b/src/hotspot/share/compiler/oopMap.cpp --- a/src/hotspot/share/compiler/oopMap.cpp +++ b/src/hotspot/share/compiler/oopMap.cpp @@ -28,17 +28,18 @@ #include "code/nmethod.hpp" #include "code/scopeDesc.hpp" #include "compiler/oopMap.hpp" #include "gc/shared/collectedHeap.hpp" #include "memory/allocation.inline.hpp" -#include "memory/iterator.hpp" +#include "memory/iterator.inline.hpp" #include "memory/resourceArea.hpp" #include "memory/universe.hpp" #include "oops/compressedOops.hpp" #include "runtime/frame.inline.hpp" #include "runtime/handles.inline.hpp" #include "runtime/signature.hpp" +#include "runtime/vframe_hp.hpp" #include "utilities/align.hpp" #include "utilities/lockFreeStack.hpp" #ifdef COMPILER1 #include "c1/c1_Defs.hpp" #endif @@ -269,16 +270,28 @@ // equal to CompressedOops::base() when a narrow oop // implicit null check is used in compiled code. // The narrow_oop_base could be NULL or be the address // of the page below heap depending on compressed oops mode. if (base_loc != NULL && *base_loc != NULL && !CompressedOops::is_base(*base_loc)) { + + if (UseStackAllocationRuntime) { + intptr_t *stack_base = fr->unextended_sp(); + intptr_t *stack_top = stack_base + cb->frame_size(); + intptr_t *oop_ptr = cast_from_oop(*base_loc); + if ((stack_base <= oop_ptr) && (oop_ptr < stack_top)) { + // If the base is a stack oop just continue because stack oops will not move + continue; + } + } + derived_oop_fn(base_loc, derived_loc); } } } { + GrowableArray stack_oops; // We want coop and oop oop_types for (OopMapStream oms(map); !oms.is_done(); oms.next()) { OopMapValue omv = oms.current(); oop* loc = fr->oopmapreg_to_location(omv.reg(),reg_map); // It should be an error if no location can be found for a @@ -294,10 +307,47 @@ // implicit null check is used in compiled code. // The narrow_oop_base could be NULL or be the address // of the page below heap depending on compressed oops mode. continue; } + + // TODO can we check if a CodeBlob includes stack allocated objects? + // If macro.cpp tags the compilation as including stack allocated objects + // then it should be possible to set something on codeblob. + if (UseStackAllocationRuntime) { + intptr_t *base = fr->unextended_sp(); + intptr_t *top = base + cb->frame_size(); + intptr_t *oop_ptr = cast_from_oop(val); + // If a stack slot points to a stack allocated object handle it + if ((base <= oop_ptr) && (oop_ptr < top)) { + // If we are verifying the stack, do extra checking that this + // stack location is indeed one of the stack allocated objects we + // have described in the oop maps. + if (VerifyStack) { + Thread* current_thread = Thread::current(); + ResourceMark rm(current_thread); + HandleMark hm(current_thread); + + vframe* vf = vframe::new_vframe(fr, reg_map, reg_map->thread()); + if (vf->is_compiled_frame()) { + compiledVFrame* cvf = compiledVFrame::cast(vf); + GrowableArray* objects = cvf->scope()->objects(); + + // Match the stack location offset to any described + // stack allocated objects. + // In case we didn't find this location in our described objects + // we just continue, it's not really a stack oop. + if (cvf->match_object_to_stack_oop(oop_ptr, base, objects) == NULL) { + continue; + } + } + } + + OopMapSet::stack_oop_do(loc, oop_fn, &stack_oops, base, top); + continue; + } + } #ifdef ASSERT if ((((uintptr_t)loc & (sizeof(*loc)-1)) != 0) || !Universe::heap()->is_in_or_null(*loc)) { tty->print_cr("# Found non oop pointer. Dumping state at failure"); // try to dump out some helpful debugging information @@ -326,10 +376,59 @@ } } } } +class OopClosureWalker: public BasicOopIterateClosure { +protected: + OopClosure *_closure; + GrowableArray *_stack_oops; + intptr_t *_base; + intptr_t *_top; + +public: + OopClosureWalker(OopClosure *closure, GrowableArray *stack_oops, intptr_t *base, intptr_t *top) : + BasicOopIterateClosure(NULL), + _closure(closure), + _stack_oops(stack_oops), + _base(base), + _top(top) {} + + void do_oop(oop *o) { + intptr_t *oop_ptr = cast_from_oop(*o); + if ((_base <= oop_ptr) && (oop_ptr < _top)) { + OopMapSet::stack_oop_do(o, _closure, _stack_oops, _base, _top); + } else { + _closure->do_oop(o); + } + } + void do_oop(narrowOop *o) { + oop obj = RawAccess<>::oop_load(o); + intptr_t *oop_ptr = cast_from_oop(obj); + if ((_base <= oop_ptr) && (oop_ptr < _top)) { + // no references to stack allocated oops in UseCompressedOops + assert(false, "unreachable"); + } else { + _closure->do_oop(o); + } + } + + debug_only(virtual bool should_verify_oops() { return false; }) +}; + +void OopMapSet::stack_oop_do(oop *p, OopClosure* oop_fn, GrowableArray *stack_oops, intptr_t *stack_base, intptr_t *stack_top) { + oop o = RawAccess::oop_load(p); + Klass *t = o->klass(); + assert(t->is_klass(), "Has to be a class"); + if (!t->is_typeArray_klass()) { + if (stack_oops->append_if_missing(o)) { + OopClosureWalker walk_elements(oop_fn, stack_oops, stack_base, stack_top); + o->oop_iterate(&walk_elements); + } + } +} + // Update callee-saved register info for the following frame void OopMapSet::update_register_map(const frame *fr, RegisterMap *reg_map) { ResourceMark rm; CodeBlob* cb = fr->cb(); diff a/src/hotspot/share/compiler/oopMap.hpp b/src/hotspot/share/compiler/oopMap.hpp --- a/src/hotspot/share/compiler/oopMap.hpp +++ b/src/hotspot/share/compiler/oopMap.hpp @@ -197,15 +197,20 @@ }; class OopMapSet : public ResourceObj { friend class VMStructs; + friend class OopClosureWalker; private: GrowableArray _list; void add(OopMap* value) { _list.append(value); } + static void stack_oop_do(oop *p, OopClosure* oop_fn, + GrowableArray *stack_oops, + intptr_t *stack_base, intptr_t *stack_top); + public: OopMapSet(); // returns the number of OopMaps in this OopMapSet int size() const { return _list.length(); } diff a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp --- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.cpp @@ -410,10 +410,14 @@ IdealKit ideal(kit, true); Node* tls = __ thread(); // ThreadLocalStorage + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + Node* no_base = __ top(); float likely = PROB_LIKELY_MAG(3); float unlikely = PROB_UNLIKELY_MAG(3); Node* young_card = __ ConI((jint)G1CardTable::g1_young_card_val()); Node* dirty_card = __ ConI((jint)G1CardTable::dirty_card_val()); @@ -457,40 +461,66 @@ Node* xor_res = __ URShiftX ( __ XorX( cast, __ CastPX(__ ctrl(), val)), __ ConI(HeapRegion::LogOfHRGrainBytes)); // if (xor_res == 0) same region so skip __ if_then(xor_res, BoolTest::ne, zeroX, likely); { - // No barrier if we are storing a NULL - __ if_then(val, BoolTest::ne, kit->null(), likely); { + // if ((unsigned)(card_offset - low_map_offset) >= (high_map_offset - low_map_offset)) stack allocated object, so skip + if (kit->C->do_stack_allocation()) { + state()->add_enqueue_barrier(static_cast(cast)); + Node* low_off = kit->longcon(ct->byte_map_bottom_offset()); + Node* delta_off = kit->longcon(ct->byte_map_top_offset() - ct->byte_map_bottom_offset()); + Node* sub_off = __ SubL(cast, low_off); - // Ok must mark the card if not already dirty + __ uif_then(sub_off, BoolTest::le, delta_off, likely); } { - // load the original value of the card - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + // No barrier if we are storing a NULL + __ if_then(val, BoolTest::ne, kit->null(), likely); { + + // Ok must mark the card if not already dirty + + // load the original value of the card + Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val, BoolTest::ne, young_card, unlikely); { - kit->sync_kit(ideal); - kit->insert_mem_bar(Op_MemBarVolatile, oop_store); - __ sync_kit(kit); + __ if_then(card_val, BoolTest::ne, young_card, unlikely); { + kit->sync_kit(ideal); + kit->insert_mem_bar(Op_MemBarVolatile, oop_store); + __ sync_kit(kit); - Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val_reload, BoolTest::ne, dirty_card); { - g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); + Node* card_val_reload = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + __ if_then(card_val_reload, BoolTest::ne, dirty_card); { + g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); + } __ end_if(); + } __ end_if(); } __ end_if(); - } __ end_if(); - } __ end_if(); + } if (kit->C->do_stack_allocation()) { + __ end_if(); + } } __ end_if(); } else { // The Object.clone() intrinsic uses this path if !ReduceInitialCardMarks. // We don't need a barrier here if the destination is a newly allocated object // in Eden. Otherwise, GC verification breaks because we assume that cards in Eden // are set to 'g1_young_gen' (see G1CardTable::verify_g1_young_region()). assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); - Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); - __ if_then(card_val, BoolTest::ne, young_card); { - g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); - } __ end_if(); + + // if ((unsigned)(card_offset - low_map_offset) >= (high_map_offset - low_map_offset)) stack allocated object, so skip + if (kit->C->do_stack_allocation()) { + state()->add_enqueue_barrier(static_cast(cast)); + Node* low_off = kit->longcon(ct->byte_map_bottom_offset()); + Node* delta_off = kit->longcon(ct->byte_map_top_offset() - ct->byte_map_bottom_offset()); + Node* sub_off = __ SubL(cast, low_off); + + __ uif_then(sub_off, BoolTest::le, delta_off, likely); } { + + Node* card_val = __ load(__ ctrl(), card_adr, TypeInt::INT, T_BYTE, Compile::AliasIdxRaw); + __ if_then(card_val, BoolTest::ne, young_card); { + g1_mark_card(kit, ideal, card_adr, oop_store, alias_idx, index, index_adr, buffer, tf); + } __ end_if(); + + } if (kit->C->do_stack_allocation()) { + __ end_if(); + } } // Final sync IdealKit and GraphKit. kit->final_sync(ideal); } @@ -659,17 +689,125 @@ } return strcmp(call->_name, "write_ref_field_pre_entry") == 0 || strcmp(call->_name, "write_ref_field_post_entry") == 0; } +bool G1BarrierSetC2::process_barrier_node(Node* node, PhaseIterGVN& igvn) const { + assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); + + // Must have a control node + if (node->in(0) == NULL) { + return false; + } + + // Search for CastP2X->Xor->URShift->Cmp path which + // checks if the store done to a different from the value's region. + Node* xorx = node->find_out_with(Op_XorX); + BoolNode* bool_node = NULL; + + if (xorx != NULL) { + + Node* shift = shift = xorx->unique_out(); + Node* cmpx = shift->unique_out(); + + assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && + cmpx->unique_out()->as_Bool()->_test._test == BoolTest::ne, + "missing region check in G1 post barrier"); + + Node* bol = cmpx->unique_out(); + assert(bol->unique_out()->is_If(), "should find if after the bool node"); + Node* if_node = bol->unique_out(); + Node* if_true = if_node->find_out_with(Op_IfTrue); + assert(if_true != NULL, "there should be false projection"); + + Node* iff_check = if_true->find_out_with(Op_If); + // Not a barrier with bound check + if (iff_check == NULL) { + return false; + } + + Node* iff_check_in_1_node = iff_check->in(1); + if (!iff_check_in_1_node->is_Bool()) { + return false; + } + bool_node = iff_check_in_1_node->as_Bool(); + + } else { + // this "could" be the the path followed when !use_ReduceInitialCardMarks() is + // used or when the two sides of the barrier are scalar replaced + //assert(false, "we managed to get here!!! process_barrier_node"); + Node *addl_node = node->find_out_with(Op_AddL); + if (addl_node == NULL) { + return false; + } + + Node* cmpx = addl_node->unique_out(); + assert(cmpx->is_Cmp() && cmpx->unique_out()->is_Bool() && + cmpx->unique_out()->as_Bool()->_test._test == BoolTest::le, + "missing region check in G1 post barrier"); + + bool_node = cmpx->unique_out()->as_Bool(); + } + + if (bool_node->_test._test != BoolTest::le) { + return false; + } + + // the input to the bool is the CMPX + Node* bool_node_in_1_node = bool_node->in(1); + if (!bool_node_in_1_node->is_Cmp()) { + return false; + } + CmpNode* cmp_node = bool_node_in_1_node->as_Cmp(); + + // the input to the CMPX is the card_table_top_offset constant + Node* cmp_node_in_2_node = cmp_node->in(2); + if (!cmp_node_in_2_node->is_Con()) { + return false; + } + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + size_t constant = ct->byte_map_top_offset() - ct->byte_map_bottom_offset(); + + // Check that the input to this CMP node is the expected constant + const TypeX* otype = cmp_node_in_2_node->find_intptr_t_type(); + if (otype != NULL && otype->is_con() && + size_t(otype->get_con()) != constant) { + // Constant offset but not the card table size constant so just return + return false; + } + + // we can't change the compare or the constant so create a new constant(0) and replace the variable + Node* cmp_node_in_1_node = cmp_node->in(1); + ConNode* zeroConstant_node = igvn.makecon(TypeX_ZERO); + if (cmp_node_in_1_node->_idx == zeroConstant_node->_idx) { + // we can get here via different nodes - but we only want to change the input once + return false; + } + + igvn.rehash_node_delayed(cmp_node); + int numReplaced = cmp_node->replace_edge(cmp_node_in_1_node, zeroConstant_node); + assert(numReplaced == 1, "Failed to replace the card_offset with Conx(0)"); + return true; +} + void G1BarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); - assert(node->outcnt() <= 2, "expects 1 or 2 users: Xor and URShift nodes"); + assert(node->outcnt() <= 3, "expects 1, 2 or 3 users: Xor, URShift and SubL nodes"); // It could be only one user, URShift node, in Object.clone() intrinsic // but the new allocation is passed to arraycopy stub and it could not // be scalar replaced. So we don't check the case. + // Certain loop optimisations may introduce a CastP2X node with + // ConvL2I in case of an AllocateArray op. Check for that case + // here and do not attempt to eliminate it as write barrier. + if (macro->C->do_stack_allocation() && !state()->is_a_barrier(static_cast(node))) { + return; + } + // An other case of only one user (Xor) is when the value check for NULL // in G1 post barrier is folded after CCP so the code which used URShift // is removed. // Take Region node before eliminating post barrier since it also @@ -718,11 +856,18 @@ } } } } } else { - assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); + // In a scenario where the two sides of the barrier are scalar replaced + // or stack allocated, the XorX node will be visited more than once, because + // both edges will be CastP2X nodes from two distinct allocates. In certain + // instances, the removal of the CastP2X node will result in removal of the + // XorX node, causing the assert below to be hit when eliminate_gc_barrier is + // called for the second node. + // assert(!use_ReduceInitialCardMarks(), "can only happen with card marking"); + // This is a G1 post barrier emitted by the Object.clone() intrinsic. // Search for the CastP2X->URShiftX->AddP->LoadB->Cmp path which checks if the card // is marked as young_gen and replace the Cmp with 0 (false) to collapse the barrier. Node* shift = node->find_out_with(Op_URShiftX); assert(shift != NULL, "missing G1 post barrier"); @@ -736,12 +881,16 @@ macro->replace_node(cmpx, macro->makecon(TypeInt::CC_EQ)); // There is no G1 pre barrier in this case } // Now CastP2X can be removed since it is used only on dead path // which currently still alive until igvn optimize it. - assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, ""); + // TODO: fix this following assert becuase of SUBL + // assert(node->outcnt() == 0 || node->unique_out()->Opcode() == Op_URShiftX, ""); macro->replace_node(node, macro->top()); + + // Remove this node from our state + state()->remove_enqueue_barrier(static_cast(node)); } Node* G1BarrierSetC2::step_over_gc_barrier(Node* c) const { if (!use_ReduceInitialCardMarks() && c != NULL && c->is_Region() && c->req() == 3) { diff a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp --- a/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp +++ b/src/hotspot/share/gc/g1/c2/g1BarrierSetC2.hpp @@ -86,10 +86,12 @@ public: virtual bool is_gc_barrier_node(Node* node) const; virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; virtual Node* step_over_gc_barrier(Node* c) const; + virtual bool process_barrier_node(Node* cast_node, PhaseIterGVN& igvn) const; + #ifdef ASSERT virtual void verify_gc_barriers(Compile* compile, CompilePhase phase) const; #endif diff a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp --- a/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp +++ b/src/hotspot/share/gc/parallel/psParallelCompact.inline.hpp @@ -112,15 +112,15 @@ oop obj = CompressedOops::decode_not_null(heap_oop); assert(ParallelScavengeHeap::heap()->is_in(obj), "should be in heap"); oop new_obj = (oop)summary_data().calc_new_pointer(obj, cm); assert(new_obj != NULL, // is forwarding ptr? - "should be forwarded"); + "should be forwarded"); // Just always do the update unconditionally? if (new_obj != NULL) { assert(ParallelScavengeHeap::heap()->is_in_reserved(new_obj), - "should be in object space"); + "should be in object space"); RawAccess::oop_store(p, new_obj); } } } diff a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.hpp @@ -271,10 +271,11 @@ // This could for example comprise macro nodes to be expanded during macro expansion. virtual void* create_barrier_state(Arena* comp_arena) const { return NULL; } // If the BarrierSetC2 state has barrier nodes in its compilation // unit state to be expanded later, then now is the time to do so. virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const { return false; } + virtual bool process_barrier_node(Node* cast_node, PhaseIterGVN& igvn) const { return false; } virtual bool optimize_loops(PhaseIdealLoop* phase, LoopOptsMode mode, VectorSet& visited, Node_Stack& nstack, Node_List& worklist) const { return false; } virtual bool strip_mined_loops_expanded(LoopOptsMode mode) const { return false; } virtual bool is_gc_specific_loop_opts_pass(LoopOptsMode mode) const { return false; } enum CompilePhase { diff a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp --- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.cpp @@ -29,10 +29,11 @@ #include "gc/shared/c2/cardTableBarrierSetC2.hpp" #include "opto/arraycopynode.hpp" #include "opto/graphKit.hpp" #include "opto/idealKit.hpp" #include "opto/macro.hpp" +#include "opto/rootnode.hpp" #include "utilities/macros.hpp" #define __ ideal. Node* CardTableBarrierSetC2::byte_map_base_node(GraphKit* kit) const { @@ -55,12 +56,10 @@ Node* adr, uint adr_idx, Node* val, BasicType bt, bool use_precise) const { - CardTableBarrierSet* ctbs = barrier_set_cast(BarrierSet::barrier_set()); - CardTable* ct = ctbs->card_table(); // No store check needed if we're storing a NULL or an old object // (latter case is probably a string constant). The concurrent // mark sweep garbage collector, however, needs to have all nonNull // oop updates flagged via card-marks. if (val != NULL && val->is_Con()) { @@ -88,10 +87,16 @@ // (Else it's an array (or unknown), and we want more precise card marks.) assert(adr != NULL, ""); IdealKit ideal(kit, true); + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + + float likely = PROB_LIKELY_MAG(3); + // Convert the pointer to an int prior to doing math on it Node* cast = __ CastPX(__ ctrl(), adr); // Divide by card size Node* card_offset = __ URShiftX( cast, __ ConI(CardTable::card_shift) ); @@ -101,35 +106,48 @@ // Get the alias_index for raw card-mark memory int adr_type = Compile::AliasIdxRaw; Node* zero = __ ConI(0); // Dirty card value - if (UseCondCardMark) { - if (ct->scanned_concurrently()) { - kit->insert_mem_bar(Op_MemBarVolatile, oop_store); - __ sync_kit(kit); - } - // The classic GC reference write barrier is typically implemented - // as a store into the global card mark table. Unfortunately - // unconditional stores can result in false sharing and excessive - // coherence traffic as well as false transactional aborts. - // UseCondCardMark enables MP "polite" conditional card mark - // stores. In theory we could relax the load from ctrl() to - // no_ctrl, but that doesn't buy much latitude. - Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, T_BYTE, adr_type); - __ if_then(card_val, BoolTest::ne, zero); - } - - // Smash zero into card - if(!ct->scanned_concurrently()) { - __ store(__ ctrl(), card_adr, zero, T_BYTE, adr_type, MemNode::unordered); - } else { - // Specialized path for CM store barrier - __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, T_BYTE, adr_type); - } - - if (UseCondCardMark) { + if (kit->C->do_stack_allocation()) { + // Stack allocation: cache CastP2XNode for later processing + state()->add_enqueue_barrier(static_cast(cast)); + + Node* low_off = kit->longcon(ct->byte_map_bottom_offset()); + Node* delta_off = kit->longcon(ct->byte_map_top_offset() - ct->byte_map_bottom_offset()); + Node* sub_off = __ SubL(cast, low_off); + + __ uif_then(sub_off, BoolTest::le, delta_off, likely); } { + + if (UseCondCardMark) { + if (ct->scanned_concurrently()) { + kit->insert_mem_bar(Op_MemBarVolatile, oop_store); + __ sync_kit(kit); + } + // The classic GC reference write barrier is typically implemented + // as a store into the global card mark table. Unfortunately + // unconditional stores can result in false sharing and excessive + // coherence traffic as well as false transactional aborts. + // UseCondCardMark enables MP "polite" conditional card mark + // stores. In theory we could relax the load from ctrl() to + // no_ctrl, but that doesn't buy much latitude. + Node* card_val = __ load( __ ctrl(), card_adr, TypeInt::BYTE, T_BYTE, adr_type); + __ if_then(card_val, BoolTest::ne, zero); + } + + // Smash zero into card + if(!ct->scanned_concurrently()) { + __ store(__ ctrl(), card_adr, zero, T_BYTE, adr_type, MemNode::unordered); + } else { + // Specialized path for CM store barrier + __ storeCM(__ ctrl(), card_adr, zero, oop_store, adr_idx, T_BYTE, adr_type); + } + + if (UseCondCardMark) { + __ end_if(); + } + } if (kit->C->do_stack_allocation()) { __ end_if(); } // Final sync IdealKit and GraphKit. kit->final_sync(ideal); @@ -166,13 +184,76 @@ bool CardTableBarrierSetC2::is_gc_barrier_node(Node* node) const { return ModRefBarrierSetC2::is_gc_barrier_node(node) || node->Opcode() == Op_StoreCM; } +bool CardTableBarrierSetC2::process_barrier_node(Node* node, PhaseIterGVN& igvn) const { + assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); + + // Must have a control node + if (node->in(0) == NULL) { + return false; + } + + Node *addx_node = node->find_out_with(Op_AddX); + if (addx_node == NULL) { + return false; + } + + Node *addx_out = addx_node->unique_out(); + if (addx_out == NULL) { + return false; + } + + CmpNode* cmp_node = addx_out->as_Cmp(); + // the input to the CMPX is the card_table_top_offset constant + Node* cmp_node_in_2_node = cmp_node->in(2); + if (!cmp_node_in_2_node->is_Con()) { + return false; + } + + BarrierSet* bs = BarrierSet::barrier_set(); + CardTableBarrierSet* ctbs = barrier_set_cast(bs); + CardTable* ct = ctbs->card_table(); + size_t constant = ct->byte_map_top_offset() - ct->byte_map_bottom_offset(); + + // Check that the input to this CMP node is the expected constant + const TypeX* otype = cmp_node_in_2_node->find_intptr_t_type(); + if (otype != NULL && otype->is_con() && + size_t(otype->get_con()) != constant) { + // Constant offset but not the card table size constant so just return + return false; + } + + // we can't change the compare or the constant so create a new constant(0) and replace the variable + Node* cmp_node_in_1_node = cmp_node->in(1); + ConNode* zeroConstant_node = igvn.makecon(TypeX_ZERO); + if (cmp_node_in_1_node->_idx == zeroConstant_node->_idx) { + // we can get here via different nodes - but we only want to change the input once + return false; + } + + igvn.rehash_node_delayed(cmp_node); + int numReplaced = cmp_node->replace_edge(cmp_node_in_1_node, zeroConstant_node); + assert(numReplaced == 1, "Failed to replace the card_offset with Conx(0)"); + igvn.replace_node(addx_node, igvn.C->top()); + + return true; +} + void CardTableBarrierSetC2::eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const { assert(node->Opcode() == Op_CastP2X, "ConvP2XNode required"); - Node *shift = node->unique_out(); + assert(node->outcnt() <= 2, "node->outcnt() <= 2"); + + // Certain loop optimisations may introduce a CastP2X node with + // ConvL2I in case of an AllocateArray op. Check for that case + // here and do not attempt to eliminate it as write barrier. + if (macro->C->do_stack_allocation() && !state()->is_a_barrier(static_cast(node))) { + return; + } + + Node *shift = node->find_out_with(Op_URShiftX); Node *addp = shift->unique_out(); for (DUIterator_Last jmin, j = addp->last_outs(jmin); j >= jmin; --j) { Node *mem = addp->last_out(j); if (UseCondCardMark && mem->is_Load()) { assert(mem->Opcode() == Op_LoadB, "unexpected code shape"); @@ -182,11 +263,126 @@ continue; } assert(mem->is_Store(), "store required"); macro->replace_node(mem, mem->in(MemNode::Memory)); } + + if (macro->C->do_stack_allocation()) { + Node *addl_node = node->find_out_with(Op_AddL); + assert(addl_node != NULL, "stackallocation expects addl"); + + Node* cmp_node = addl_node->unique_out(); + assert(cmp_node != NULL && cmp_node->is_Cmp(), "expected unique cmp node"); + + macro->replace_node(cmp_node, macro->makecon(TypeInt::CC_EQ)); + } + + // Stack allocation: remove this node from our cache so we don't process it later + state()->remove_enqueue_barrier(static_cast(node)); } bool CardTableBarrierSetC2::array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const { bool is_oop = is_reference_type(type); return is_oop && (!tightly_coupled_alloc || !use_ReduceInitialCardMarks()); } + +bool CardTableBarrierSetC2::expand_barriers(Compile* C, PhaseIterGVN& igvn) const { + // We need to process write barriers for extra checks in case we have stack allocation on + if (C->do_stack_allocation()) { + BarrierSetC2State* set_state = state(); + + for (int i = 0; i < set_state->enqueue_barriers_count(); i++) { + Node* n = set_state->enqueue_barrier(i); + process_barrier_node(n, igvn); + } + + if (set_state->enqueue_barriers_count()) { + // this kicks in the dead code elimination we need to remove the redundant check + igvn.optimize(); + } + } + + return false; +} + +Node* CardTableBarrierSetC2::step_over_gc_barrier(Node* c) const { + if (Compile::current()->do_stack_allocation() && + !use_ReduceInitialCardMarks() && + c != NULL && c->is_Region() && c->req() == 3) { + + // [Proj] <----------- step over to here and return + // | + // ----------- + // / \ + // / \ + // / [CastP2X] + // | / + // | [AddL] + // | / + // | [CmpUL] + // | / + // \ [Bool] + // \ / + // [If] + // / \ + // [IfFalse] [IfTrue] + // \ / + // [Region] <---------------- c node + + Node* if_bool = c->in(1); + assert(if_bool->is_IfTrue() || if_bool->is_IfFalse(), "Invalid gc graph pattern"); + Node* if_node = if_bool->in(0); + Node* proj_node = if_node->in(0); + assert(proj_node->is_Proj(), "Invalid gc graph pattern"); + return proj_node; + } + return c; +} + +void CardTableBarrierSetC2::register_potential_barrier_node(Node* node) const { + if (node->Opcode() == Op_CastP2X) { + state()->add_enqueue_barrier(static_cast(node)); + } +} + +void CardTableBarrierSetC2::unregister_potential_barrier_node(Node* node) const { + if (node->Opcode() == Op_CastP2X) { + state()->remove_enqueue_barrier(static_cast(node)); + } +} + +BarrierSetC2State* CardTableBarrierSetC2::state() const { + BarrierSetC2State* ret = reinterpret_cast(Compile::current()->barrier_set_state()); + assert(ret != NULL, "Sanity"); + return ret; +} + +void* CardTableBarrierSetC2::create_barrier_state(Arena* comp_arena) const { + return new(comp_arena) BarrierSetC2State(comp_arena); +} + +BarrierSetC2State::BarrierSetC2State(Arena* comp_arena) + : _enqueue_barriers(new (comp_arena) GrowableArray(comp_arena, 8, 0, NULL)) { +} + +int BarrierSetC2State::enqueue_barriers_count() const { + return _enqueue_barriers->length(); +} + +CastP2XNode* BarrierSetC2State::enqueue_barrier(int idx) const { + return _enqueue_barriers->at(idx); +} + +void BarrierSetC2State::add_enqueue_barrier(CastP2XNode* n) { + assert(!_enqueue_barriers->contains(n), "duplicate entry in barrier list"); + _enqueue_barriers->append(n); +} + +void BarrierSetC2State::remove_enqueue_barrier(CastP2XNode* n) { + if (_enqueue_barriers->contains(n)) { + _enqueue_barriers->remove(n); + } +} + +bool BarrierSetC2State::is_a_barrier(CastP2XNode* n) { + return _enqueue_barriers->contains(n); +} diff a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp --- a/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp +++ b/src/hotspot/share/gc/shared/c2/cardTableBarrierSetC2.hpp @@ -24,10 +24,28 @@ #ifndef SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP #define SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP #include "gc/shared/c2/modRefBarrierSetC2.hpp" +#include "utilities/growableArray.hpp" + +class CastP2XNode; + +class BarrierSetC2State : public ResourceObj { +private: + GrowableArray* _enqueue_barriers; + +public: + BarrierSetC2State(Arena* comp_arena); + + int enqueue_barriers_count() const; + CastP2XNode* enqueue_barrier(int idx) const; + void add_enqueue_barrier(CastP2XNode* n); + void remove_enqueue_barrier(CastP2XNode* n); + bool is_a_barrier(CastP2XNode* n); +}; + class CardTableBarrierSetC2: public ModRefBarrierSetC2 { protected: virtual void post_barrier(GraphKit* kit, Node* ctl, @@ -44,10 +62,19 @@ public: virtual void clone(GraphKit* kit, Node* src, Node* dst, Node* size, bool is_array) const; virtual bool is_gc_barrier_node(Node* node) const; virtual void eliminate_gc_barrier(PhaseMacroExpand* macro, Node* node) const; virtual bool array_copy_requires_gc_barriers(bool tightly_coupled_alloc, BasicType type, bool is_clone, ArrayCopyPhase phase) const; + virtual bool process_barrier_node(Node* cast_node, PhaseIterGVN& igvn) const; + virtual Node* step_over_gc_barrier(Node* c) const; bool use_ReduceInitialCardMarks() const; + + BarrierSetC2State* state() const; + + virtual void register_potential_barrier_node(Node* node) const; + virtual void unregister_potential_barrier_node(Node* node) const; + virtual bool expand_barriers(Compile* C, PhaseIterGVN& igvn) const; + virtual void* create_barrier_state(Arena* comp_arena) const; }; #endif // SHARE_GC_SHARED_C2_CARDTABLEBARRIERSETC2_HPP diff a/src/hotspot/share/gc/shared/cardTable.hpp b/src/hotspot/share/gc/shared/cardTable.hpp --- a/src/hotspot/share/gc/shared/cardTable.hpp +++ b/src/hotspot/share/gc/shared/cardTable.hpp @@ -246,10 +246,12 @@ // This would be the 0th element of _byte_map, if the heap started at 0x0. // But since the heap starts at some higher address, this points to somewhere // before the beginning of the actual _byte_map. CardValue* byte_map_base() const { return _byte_map_base; } bool scanned_concurrently() const { return _scanned_concurrently; } + size_t byte_map_top_offset() const { return uintptr_t(_whole_heap.end()); } + size_t byte_map_bottom_offset() const { return uintptr_t(_whole_heap.start()); } virtual bool is_in_young(oop obj) const = 0; // Print a description of the memory for the card table virtual void print_on(outputStream* st) const; diff a/src/hotspot/share/memory/iterator.cpp b/src/hotspot/share/memory/iterator.cpp --- a/src/hotspot/share/memory/iterator.cpp +++ b/src/hotspot/share/memory/iterator.cpp @@ -24,10 +24,11 @@ #include "precompiled.hpp" #include "code/nmethod.hpp" #include "memory/iterator.inline.hpp" #include "oops/oop.inline.hpp" +#include "runtime/fieldDescriptor.inline.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" DoNothingClosure do_nothing_cl; diff a/src/hotspot/share/opto/block.cpp b/src/hotspot/share/opto/block.cpp --- a/src/hotspot/share/opto/block.cpp +++ b/src/hotspot/share/opto/block.cpp @@ -175,11 +175,11 @@ } // Ideal nodes are allowable in empty blocks: skip them Only MachNodes // turn directly into code, because only MachNodes have non-trivial // emit() functions. - while ((end_idx > 0) && !get_node(end_idx)->is_Mach()) { + while ((end_idx > 0) && !(get_node(end_idx)->is_Mach() || get_node(end_idx)->is_BoxLock())) { end_idx--; } // No room for any interesting instructions? if (end_idx == 0) { diff a/src/hotspot/share/opto/buildOopMap.cpp b/src/hotspot/share/opto/buildOopMap.cpp --- a/src/hotspot/share/opto/buildOopMap.cpp +++ b/src/hotspot/share/opto/buildOopMap.cpp @@ -239,11 +239,12 @@ Node *def = _defs[reg]; // Get reaching def assert( def, "since live better have reaching def" ); // Classify the reaching def as oop, derived, callee-save, dead, or other const Type *t = def->bottom_type(); - if( t->isa_oop_ptr() ) { // Oop or derived? + if( t->isa_oop_ptr() || // Oop or derived? + (C->do_stack_allocation() && t->isa_rawptr() && def->is_BoxLock())) { // consider stack oops too assert( !OptoReg::is_valid(_callees[reg]), "oop can't be callee save" ); #ifdef _LP64 // 64-bit pointers record oop-ishness on 2 aligned adjacent registers. // Make sure both are record from the same reaching def, but do not // put both into the oopmap. diff a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -532,10 +532,19 @@ \ product(intx, EliminateAllocationArraySizeLimit, 64, \ "Array size (number of elements) limit for scalar replacement") \ range(0, max_jint) \ \ + experimental(bool, UseStackAllocation, false, \ + "Leverage stack allocation to reduce heap pressure") \ + \ + experimental(bool, UseStackAllocationRuntime, false, \ + "Enable the stack allocation runtime code in oopmap") \ + \ + notproduct(bool, PrintStackAllocation, false, \ + "Print stack allocation debug information") \ + \ product(bool, OptimizePtrCompare, true, \ "Use escape analysis to optimize pointers compare") \ \ notproduct(bool, PrintOptimizePtrCompare, false, \ "Print information about optimized pointers compare") \ diff a/src/hotspot/share/opto/callnode.cpp b/src/hotspot/share/opto/callnode.cpp --- a/src/hotspot/share/opto/callnode.cpp +++ b/src/hotspot/share/opto/callnode.cpp @@ -483,29 +483,48 @@ } st->print("={"); uint nf = spobj->n_fields(); if (nf > 0) { uint first_ind = spobj->first_index(mcall->jvms()); - Node* fld_node = mcall->in(first_ind); + Node* fld_node = NULL; ciField* cifield; if (iklass != NULL) { st->print(" ["); cifield = iklass->nonstatic_field_at(0); cifield->print_name_on(st); - format_helper(regalloc, st, fld_node, ":", 0, &scobjs); + if(spobj->stack_allocated()) { + st->print(":*0]"); + } else { + fld_node = mcall->in(first_ind); + format_helper(regalloc, st, fld_node, ":", 0, &scobjs); + } } else { - format_helper(regalloc, st, fld_node, "[", 0, &scobjs); + if(spobj->stack_allocated()) { + st->print("[*0]"); + } else { + fld_node = mcall->in(first_ind); + format_helper(regalloc, st, fld_node, "[", 0, &scobjs); + } } for (uint j = 1; j < nf; j++) { - fld_node = mcall->in(first_ind+j); if (iklass != NULL) { st->print(", ["); cifield = iklass->nonstatic_field_at(j); cifield->print_name_on(st); - format_helper(regalloc, st, fld_node, ":", j, &scobjs); + if(spobj->stack_allocated()) { + st->print(":*%d]", j); + } else { + fld_node = mcall->in(first_ind+j); + format_helper(regalloc, st, fld_node, ":", j, &scobjs); + } } else { - format_helper(regalloc, st, fld_node, ", [", j, &scobjs); + if(spobj->stack_allocated()) { + st->print(", [*%d]", j); + } else { + fld_node = mcall->in(first_ind+j); + format_helper(regalloc, st, fld_node, ", [", j, &scobjs); + } } } } st->print(" }"); } @@ -959,10 +978,17 @@ return true; } return false; } +bool CallNode::is_call_to_osr_migration_end() const { + if (_name != NULL && strstr(_name, "OSR_migration_end") != 0) { + return true; + } + return false; +} + //============================================================================= uint CallJavaNode::size_of() const { return sizeof(*this); } bool CallJavaNode::cmp( const Node &n ) const { CallJavaNode &call = (CallJavaNode&)n; return CallNode::cmp(call) && _method == call._method && @@ -1300,11 +1326,12 @@ #endif uint first_index, uint n_fields) : TypeNode(tp, 1), // 1 control input -- seems required. Get from root. _first_index(first_index), - _n_fields(n_fields) + _n_fields(n_fields), + _is_stack_allocated(false) #ifdef ASSERT , _alloc(alloc) #endif { init_class_id(Class_SafePointScalarObject); @@ -1362,10 +1389,12 @@ { init_class_id(Class_Allocate); init_flags(Flag_is_macro); _is_scalar_replaceable = false; _is_non_escaping = false; + _is_stack_allocateable = false; + _is_referenced_stack_allocation = false; _is_allocation_MemBar_redundant = false; Node *topnode = C->top(); init_req( TypeFunc::Control , ctrl ); init_req( TypeFunc::I_O , abio ); diff a/src/hotspot/share/opto/callnode.hpp b/src/hotspot/share/opto/callnode.hpp --- a/src/hotspot/share/opto/callnode.hpp +++ b/src/hotspot/share/opto/callnode.hpp @@ -492,10 +492,11 @@ class SafePointScalarObjectNode: public TypeNode { uint _first_index; // First input edge relative index of a SafePoint node where // states of the scalarized object fields are collected. // It is relative to the last (youngest) jvms->_scloff. uint _n_fields; // Number of non-static fields of the scalarized object. + bool _is_stack_allocated; DEBUG_ONLY(AllocateNode* _alloc;) virtual uint hash() const ; // { return NO_HASH; } virtual bool cmp( const Node &n ) const; @@ -517,10 +518,13 @@ assert(jvms != NULL, "missed JVMS"); return jvms->scloff() + _first_index; } uint n_fields() const { return _n_fields; } + void set_stack_allocated(bool v) { _is_stack_allocated = true; } + bool stack_allocated() { return _is_stack_allocated; } + #ifdef ASSERT AllocateNode* alloc() const { return _alloc; } #endif virtual uint size_of() const { return sizeof(*this); } @@ -637,10 +641,11 @@ void extract_projections(CallProjections* projs, bool separate_io_proj, bool do_asserts = true); virtual uint match_edge(uint idx) const; bool is_call_to_arraycopystub() const; + bool is_call_to_osr_migration_end() const; #ifndef PRODUCT virtual void dump_req(outputStream *st = tty) const; virtual void dump_spec(outputStream *st) const; #endif @@ -839,10 +844,13 @@ InitialTest, // slow-path test (may be constant) ALength, // array length (or TOP if none) ParmLimit }; + // Maximum object size considered for stack allocation + static const int StackAllocSizeLimit = 0x100; + static const TypeFunc* alloc_type(const Type* t) { const Type** fields = TypeTuple::fields(ParmLimit - TypeFunc::Parms); fields[AllocSize] = TypeInt::POS; fields[KlassNode] = TypeInstPtr::NOTNULL; fields[InitialTest] = TypeInt::BOOL; @@ -860,10 +868,12 @@ } // Result of Escape Analysis bool _is_scalar_replaceable; bool _is_non_escaping; + bool _is_stack_allocateable; + bool _is_referenced_stack_allocation; // True when MemBar for new is redundant with MemBar at initialzer exit bool _is_allocation_MemBar_redundant; virtual uint size_of() const; // Size is bigger AllocateNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio, diff a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -902,10 +902,12 @@ env()->set_oop_recorder(new OopRecorder(env()->arena())); env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder())); env()->set_dependencies(new Dependencies(env())); _fixed_slots = 0; + _stack_allocated_slots = 0; + set_fail_stack_allocation_with_references(false); set_has_split_ifs(false); set_has_loops(has_method() && method()->has_loops()); // first approximation set_has_stringbuilder(false); set_has_boxed_value(false); _trap_can_recompile = false; // no traps emitted yet diff a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -257,11 +257,11 @@ // Control of this compilation. int _max_inline_size; // Max inline size for this compilation int _freq_inline_size; // Max hot method inline size for this compilation int _fixed_slots; // count of frame slots not allocated by the register - // allocator i.e. locks, original deopt pc, etc. + // allocator i.e. locks, original deopt pc, stack allocated objects, etc. uintx _max_node_limit; // Max unique node count during a single compilation. int _major_progress; // Count of something big happening bool _inlining_progress; // progress doing incremental inlining? bool _inlining_incrementally;// Are we doing incremental inlining (post parse) @@ -298,10 +298,14 @@ // JSR 292 bool _has_method_handle_invokes; // True if this method has MethodHandle invokes. RTMState _rtm_state; // State of Restricted Transactional Memory usage int _loop_opts_cnt; // loop opts round bool _clinit_barrier_on_entry; // True if clinit barrier is needed on nmethod entry + int _stack_allocated_slots; // count of frame slots potentially taken by stack allocated objects. + // Going over the limit disables stack allocation of objects pointing + // to other stack allocated objects. + bool _fail_stack_allocation_with_references; // Compilation environment. Arena _comp_arena; // Arena with lifetime equivalent to Compile void* _barrier_set_state; // Potential GC barrier state for Compile ciEnv* _env; // CI interface @@ -504,11 +508,12 @@ /** Do boxing elimination. */ bool eliminate_boxing() const { return _eliminate_boxing; } /** Do aggressive boxing elimination. */ bool aggressive_unboxing() const { return _eliminate_boxing && AggressiveUnboxing; } bool save_argument_registers() const { return _save_argument_registers; } - + /** Do stack allocation */ + bool do_stack_allocation() const { return UseStackAllocation || _directive->UseStackAllocationOption; } // Other fixed compilation parameters. ciMethod* method() const { return _method; } int entry_bci() const { return _entry_bci; } bool is_osr_compilation() const { return _entry_bci != InvocationEntryBci; } @@ -590,10 +595,14 @@ bool profile_rtm() const { return _rtm_state == ProfileRTM; } uint max_node_limit() const { return (uint)_max_node_limit; } void set_max_node_limit(uint n) { _max_node_limit = n; } bool clinit_barrier_on_entry() { return _clinit_barrier_on_entry; } void set_clinit_barrier_on_entry(bool z) { _clinit_barrier_on_entry = z; } + int stack_allocated_slots() const { assert(_stack_allocated_slots >= 0, ""); return _stack_allocated_slots; } + void set_stack_allocated_slots(int n) { _stack_allocated_slots = n; } + bool fail_stack_allocation_with_references() const { return _fail_stack_allocation_with_references; } + void set_fail_stack_allocation_with_references(bool b) { _fail_stack_allocation_with_references = b; } // check the CompilerOracle for special behaviours for this compile bool method_has_option(const char * option) { return method() != NULL && method()->has_option(option); } diff a/src/hotspot/share/opto/escape.cpp b/src/hotspot/share/opto/escape.cpp --- a/src/hotspot/share/opto/escape.cpp +++ b/src/hotspot/share/opto/escape.cpp @@ -45,10 +45,11 @@ _nodes(C->comp_arena(), C->unique(), C->unique(), NULL), _in_worklist(C->comp_arena()), _next_pidx(0), _collecting(true), _verify(false), + _has_locks(false), _compile(C), _igvn(igvn), _node_map(C->comp_arena()) { // Add unknown java object. add_java_object(C->top(), PointsToNode::GlobalEscape); @@ -181,10 +182,15 @@ #endif } else if (n->is_ArrayCopy()) { // Keep a list of ArrayCopy nodes so if one of its input is non // escaping, we can record a unique type arraycopy_worklist.append(n->as_ArrayCopy()); + } else if (n->is_Lock()) { + Node* obj = n->as_Lock()->obj_node()->uncast(); + if (!(obj->is_Parm() || obj->is_Con())) { + _has_locks = true; + } } for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { Node* m = n->fast_out(i); // Get user ideal_nodes.push(m); } @@ -248,13 +254,56 @@ if (noescape && ptn->scalar_replaceable()) { adjust_scalar_replaceable_state(ptn); if (ptn->scalar_replaceable()) { alloc_worklist.append(ptn->ideal_node()); } + } else { + // Set scalar replaceable to false to for stack allocation analysis below + ptn->set_scalar_replaceable(false); } } + // 4. Perform stack allocation analysis + if (C->do_stack_allocation() && (!_has_locks || (EliminateLocks && EliminateNestedLocks))) { + if (non_escaped_length > 0) { + for (int next = 0; next < non_escaped_length; next++) { + JavaObjectNode* ptn = non_escaped_worklist.at(next); + PointsToNode::EscapeState es = ptn->escape_state(); + assert(es < PointsToNode::GlobalEscape, "list can not contain GlobalEscape objects"); + if (es == PointsToNode::ArgEscape) { +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated as it escapes as an argument", ptn->ideal_node()->_idx); + } +#endif + continue; + } + + Node* n = ptn->ideal_node(); + if (!n->is_Allocate()) { + continue; + } + + n->as_Allocate()->_is_stack_allocateable = eligible_for_stack_allocation(ptn); + } + } + + // 4.1 Verify that object chains don't contain heap objects pointing + // to stack allocated objects. Loop until there are changes in the + // state of which objects are allowed to be stack allocated. + bool more_work = non_escaped_length > 0; + while (more_work) { + more_work = verify_stack_allocated_object_chains(non_escaped_worklist, non_escaped_length); + } + +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + print_stack_allocated_candidates(non_escaped_worklist, non_escaped_length); + } +#endif + } + #ifdef ASSERT if (VerifyConnectionGraph) { // Verify that graph is complete - no new edges could be added or needed. verify_connection_graph(ptnodes_worklist, non_escaped_worklist, java_objects_worklist, addp_worklist); @@ -268,18 +317,18 @@ _collecting = false; } // TracePhase t3("connectionGraph") - // 4. Optimize ideal graph based on EA information. + // 5. Optimize ideal graph based on EA information. bool has_non_escaping_obj = (non_escaped_worklist.length() > 0); if (has_non_escaping_obj) { optimize_ideal_graph(ptr_cmp_worklist, storestore_worklist); } #ifndef PRODUCT - if (PrintEscapeAnalysis) { + if (print_escape_analysis()) { dump(ptnodes_worklist); // Dump ConnectionGraph } #endif bool has_scalar_replaceable_candidates = (alloc_worklist.length() > 0); @@ -292,21 +341,21 @@ assert(ptn->escape_state() == PointsToNode::NoEscape && ptn->scalar_replaceable(), "sanity"); } } #endif - // 5. Separate memory graph for scalar replaceable allcations. + // 6. Separate memory graph for scalar replaceable allcations. if (has_scalar_replaceable_candidates && C->AliasLevel() >= 3 && EliminateAllocations) { // Now use the escape information to create unique types for // scalar replaceable objects. split_unique_types(alloc_worklist, arraycopy_worklist); if (C->failing()) return false; C->print_method(PHASE_AFTER_EA, 2); #ifdef ASSERT - } else if (Verbose && (PrintEscapeAnalysis || PrintEliminateAllocations)) { + } else if (Verbose && (print_escape_analysis() || print_eliminate_allocations())) { tty->print("=== No allocations eliminated for "); C->method()->print_short_name(); if(!EliminateAllocations) { tty->print(" since EliminateAllocations is off ==="); } else if(!has_scalar_replaceable_candidates) { @@ -318,10 +367,274 @@ #endif } return has_non_escaping_obj; } +// If an allocation is dominated by a loop, check to see if the lifetime of two instances +// may overlap. If they do this allocate is not eligible for stack allocation +bool ConnectionGraph::allocation_lifetime_overlap(AllocateNode *alloc, PhiNode *phi) { + Node *child0 = phi->in(0); + if (!child0->is_Loop()) { + return false; + } + // This is very pessimistic... but correct. It could be optimized + VectorSet visited(Thread::current()->resource_area()); + GrowableArray node_worklist; + + for (uint i = 1; i < phi->outcnt(); i++) { + node_worklist.push(phi->raw_out(i)); + } + + while(node_worklist.length() != 0) { + Node* node = node_worklist.pop(); + if (visited.test_set(node->_idx)) { + continue; // already processed + } + + if (node->is_Phi()) { + if (phi == node) { + return true; + } + } + for (DUIterator_Fast imax, i = node->fast_outs(imax); i < imax; i++) { + node_worklist.push(node->fast_out(i)); + } + } + return false; +} + +// Find if an allocate result may reach an EncodeP +bool ConnectionGraph::oop_may_be_compressed(Node* alloc_result) { + VectorSet visited(Thread::current()->resource_area()); + GrowableArray node_worklist; + + node_worklist.push(alloc_result); + visited.set(alloc_result->_idx); + + while(node_worklist.length() != 0) { + Node* node = node_worklist.pop(); + + for (DUIterator_Fast imax, i = node->fast_outs(imax); i < imax; i++) { + Node *use = node->fast_out(i); + if (use->is_Phi()) { + if (!visited.test_set(use->_idx)) { + node_worklist.push(use); + } + } else if (use->is_EncodeP()) { + return true; + } + } + } + + return false; +} + +// Various checks to determine if an alloc is a candidate for stack allocation +bool ConnectionGraph::eligible_for_stack_allocation(PointsToNode* ptn) { + assert(ptn->ideal_node()->is_Allocate(), "Must be called on allocate or allocate array node"); + + AllocateNode *alloc = ptn->ideal_node()->as_Allocate(); + Node* res = alloc->result_cast(); + if (res == NULL) { +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated due to NULL result_cast", alloc->_idx); + } +#endif + return false; + } else if (!res->is_CheckCastPP()) { +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated due to an invalid result_cast", alloc->_idx); + } +#endif + return false; + } + + Node* size_in_bytes = alloc->in(AllocateNode::AllocSize); + intptr_t size_of_object = _igvn->find_intptr_t_con(size_in_bytes, -1); + if ((size_of_object == -1) || (size_of_object > AllocateNode::StackAllocSizeLimit)) { + // Object has unknown size or is too big so it can not be stack allocated. + // No need to find reaching objects since it does not have any fields +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated due to an invalid size", alloc->_idx); + } +#endif + return false; + } + + if (alloc->is_AllocateArray()) { + int length = alloc->in(AllocateNode::ALength)->find_int_con(-1); + if (length < 0 || length > EliminateAllocationArraySizeLimit) { + // Array does not have a constant length so it can not be stack allocated +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated as it is an array with an invalid length", alloc->_idx); + } +#endif + return false; + } + } + + if (UseCompressedOops && oop_may_be_compressed(res)) { +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated due to compress operation on the stack oop", alloc->_idx); + } +#endif + return false; + } + + return all_uses_eligible_for_stack_allocation(ptn); +} + +// Check if the alloc has uses that make it ineligible for stack allocation +bool ConnectionGraph::all_uses_eligible_for_stack_allocation(PointsToNode *ptn) { + assert(ptn->ideal_node()->is_Allocate(), "Must be called on allocate or allocate array node"); + + AllocateNode *alloc = ptn->ideal_node()->as_Allocate(); + Node* res = alloc->result_cast(); + + assert(res != NULL, "Result cast must not be NULL at this point"); + + for (int uses = 0; uses < ptn->use_count(); uses ++) { + PointsToNode *use = ptn->use(uses); + if (use->is_LocalVar()) { + LocalVarNode *local = use->as_LocalVar(); + Node *node = local->ideal_node(); + if (node->is_Phi()) { + if (allocation_lifetime_overlap(alloc, node->as_Phi())) { +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated as it may overlap with older versions of itself", alloc->_idx); + } +#endif + return false; + } + } else if (node->is_Load() && node->Opcode() == Op_LoadP) { + Node *in1 = node->in(1); + if ((in1 != NULL) && in1->is_Phi()) { + if (allocation_lifetime_overlap(alloc, in1->as_Phi())) { +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated as it may overlap with older versions of itself", alloc->_idx); + } +#endif + return false; + } + } + } + } else if (use->is_Field()) { + if (UseCompressedOops) { +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated as it referenced by another object", alloc->_idx); + } +#endif + return false; + } + } else if (use->is_Arraycopy()) { + if (ptn->arraycopy_dst() && alloc->is_AllocateArray()) { + Node* klass = alloc->in(AllocateNode::KlassNode); + ciKlass* k = _igvn->type(klass)->is_klassptr()->klass(); + if (k->is_obj_array_klass()) { + // The System.arraycopy helper has a post store barrier which does not handle stack allocated objects +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated as it is referenced from an arraycopy", alloc->_idx); + } +#endif + return false; + } + } + } + } + + return true; +} + +bool ConnectionGraph::verify_stack_allocated_object_chains(GrowableArray &non_escaped_worklist, int non_escaped_length) { + for (int next = 0; next < non_escaped_length; next++) { + JavaObjectNode* ptn = non_escaped_worklist.at(next); + if (ptn->escape_state() != PointsToNode::NoEscape) { + continue; + } + Node* n = ptn->ideal_node(); + if (!n->is_Allocate()) { + continue; + } + AllocateNode *alloc = n->as_Allocate(); + if (!alloc->_is_stack_allocateable) { + continue; + } + for (int uses = 0; uses < ptn->use_count(); uses ++) { + PointsToNode *use = ptn->use(uses); + if(use->is_Field()) { + for (BaseIterator i(use->as_Field()); i.has_next(); i.next()) { + PointsToNode* base = i.get(); + if (base->is_JavaObject()) { + JavaObjectNode *new_obj = base->as_JavaObject(); + if (new_obj == ptn) { + continue; + } + if (!new_obj->ideal_node()->is_Allocate()) { + if (new_obj->ideal_node()->Opcode() == Op_ConP) { + TypeNode *tn = new_obj->ideal_node()->as_Type(); + if (tn->type() == TypePtr::NULL_PTR) { + // Allow NULL ptr ConP + continue; + } + } + alloc->_is_stack_allocateable = false; + alloc->_is_referenced_stack_allocation = false; +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated, it is referenced by a non allocate object", alloc->_idx); + } +#endif + return true; + } + AllocateNode *new_alloc = new_obj->ideal_node()->as_Allocate(); + if (!new_alloc->_is_stack_allocateable && !new_obj->scalar_replaceable()) { + alloc->_is_stack_allocateable = false; + alloc->_is_referenced_stack_allocation = false; +#ifndef PRODUCT + if (print_escape_analysis() || print_stack_allocation()) { + tty->print_cr("---- Alloc node %d can not be stack allocated, it is referenced by another non SCR/SA object %d", alloc->_idx, new_alloc->_idx); + } +#endif + return true; + } else { + assert(alloc->_is_stack_allocateable, "has to be stack allocateable"); + alloc->_is_referenced_stack_allocation = true; + } + } + } + } + } + } + + return false; +} + +#ifndef PRODUCT +void ConnectionGraph::print_stack_allocated_candidates(GrowableArray &non_escaped_worklist, int non_escaped_length) { + for (int next = 0; next < non_escaped_length; next++) { + JavaObjectNode* ptn = non_escaped_worklist.at(next); + Node* n = ptn->ideal_node(); + if (!n->is_Allocate()) { + continue; + } + AllocateNode *alloc = n->as_Allocate(); + if (alloc->_is_stack_allocateable) { + tty->print_cr("++++ Alloc node %d is marked as stack allocateable is_scalar_replaceable (%d)", n->_idx, ptn->scalar_replaceable()); + } + } +} +#endif + // Utility function for nodes that load an object void ConnectionGraph::add_objload_to_connection_graph(Node *n, Unique_Node_List *delayed_worklist) { // Using isa_ptr() instead of isa_oopptr() for LoadP and Phi because // ThreadLocal has RawPtr type. const Type* t = _igvn->type(n); @@ -1235,11 +1548,11 @@ // Possible infinite build_connection_graph loop, // bailout (no changes to ideal graph were made). return false; } #ifdef ASSERT - if (Verbose && PrintEscapeAnalysis) { + if (Verbose && print_escape_analysis()) { tty->print_cr("EA: %d iterations to build connection graph with %d nodes and worklist size %d", iterations, nodes_size(), ptnodes_worklist.length()); } #endif @@ -2780,11 +3093,14 @@ result = un; } else { break; } } else if (result->is_ClearArray()) { - if (!ClearArrayNode::step_through(&result, (uint)toop->instance_id(), igvn)) { + intptr_t offset; + AllocateNode* alloc = AllocateNode::Ideal_allocation(result->in(3), igvn, offset); + + if ((alloc == NULL) || !ClearArrayNode::step_through(&result, (uint)toop->instance_id(), igvn)) { // Can not bypass initialization of the instance // we are looking for. break; } // Otherwise skip it (the call updated 'result' value). diff a/src/hotspot/share/opto/escape.hpp b/src/hotspot/share/opto/escape.hpp --- a/src/hotspot/share/opto/escape.hpp +++ b/src/hotspot/share/opto/escape.hpp @@ -331,10 +331,12 @@ // is still being collected. If false, // no new nodes will be processed. bool _verify; // verify graph + bool _has_locks; // Used by stack allocation + JavaObjectNode* null_obj; Node* _pcmp_neq; // ConI(#CC_GT) Node* _pcmp_eq; // ConI(#CC_EQ) Compile* _compile; // Compile object for current compilation @@ -598,12 +600,42 @@ } void add_to_congraph_unsafe_access(Node* n, uint opcode, Unique_Node_List* delayed_worklist); bool add_final_edges_unsafe_access(Node* n, uint opcode); + // Helpers for stack allocation + + // If an allocation is dominated by a loop, check to see if the lifetime of two instances + // may overlap. If they do this allocate is not eligible for stack allocation + bool allocation_lifetime_overlap(AllocateNode *alloc, PhiNode *phi); + // Stack allocation has limited support for compressed references at the moment. + // This helper checks if an oop may be compressed at some point in the graph. + bool oop_may_be_compressed(Node* alloc_result); + // Check if the alloc node is eligible for stack allocation + bool eligible_for_stack_allocation(PointsToNode* ptn); + // Check if the alloc has uses that make it ineligible for stack allocation + bool all_uses_eligible_for_stack_allocation(PointsToNode *ptn); + // Verify object chains for stack allocated objects. Heap objects cannot point to stack allocated objects. + bool verify_stack_allocated_object_chains(GrowableArray &non_escaped_worklist, int non_escaped_length); +#ifndef PRODUCT + void print_stack_allocated_candidates(GrowableArray &non_escaped_worklist, int non_escaped_length); +#endif + #ifndef PRODUCT void dump(GrowableArray& ptnodes_worklist); + + bool print_escape_analysis() { + return PrintEscapeAnalysis || _compile->directive()->PrintEscapeAnalysisOption; + } + + bool print_eliminate_allocations() { + return PrintEliminateAllocations || _compile->directive()->PrintEliminateAllocationsOption; + } + + bool print_stack_allocation() { + return PrintStackAllocation || _compile->directive()->PrintStackAllocationOption; + } #endif }; inline PointsToNode::PointsToNode(ConnectionGraph *CG, Node* n, EscapeState es, NodeType type): _edges(CG->_compile->comp_arena(), 2, 0, NULL), diff a/src/hotspot/share/opto/idealKit.cpp b/src/hotspot/share/opto/idealKit.cpp --- a/src/hotspot/share/opto/idealKit.cpp +++ b/src/hotspot/share/opto/idealKit.cpp @@ -62,10 +62,34 @@ set_all_memory(gkit->merged_memory()); set_i_o(gkit->i_o()); set_ctrl(gkit->control()); } +//-------------------------------uif_then------------------------------------- +// Create: unsigned if(left relop right) +// / \ +// iffalse iftrue +// Push the iffalse cvstate onto the stack. The iftrue becomes the current cvstate. +void IdealKit::uif_then(Node* left, BoolTest::mask relop, + Node* right, float prob, float cnt, bool push_new_state) { + assert((state() & (BlockS|LoopS|IfThenS|ElseS)), "bad state for new If"); + Node* bol; + if (left->bottom_type()->isa_ptr() == NULL) { + if (left->bottom_type()->isa_int() != NULL) { + bol = Bool(CmpU(left, right), relop); + } else { + assert(left->bottom_type()->isa_long() != NULL, "what else?"); + bol = Bool(CmpUL(left, right), relop); + } + + } else { + bol = Bool(CmpP(left, right), relop); + } + + if_then_common(bol, prob, cnt, push_new_state); +} + //-------------------------------if_then------------------------------------- // Create: if(left relop right) // / \ // iffalse iftrue // Push the iffalse cvstate onto the stack. The iftrue becomes the current cvstate. @@ -82,10 +106,17 @@ } } else { bol = Bool(CmpP(left, right), relop); } + + if_then_common(bol, prob, cnt, push_new_state); +} + +// Common helper to create the If nodes for if_then and uif_then +void IdealKit::if_then_common(Node* bol, float prob, float cnt, + bool push_new_state) { // Delay gvn.tranform on if-nodes until construction is finished // to prevent a constant bool input from discarding a control output. IfNode* iff = delay_transform(new IfNode(ctrl(), bol, prob, cnt))->as_If(); Node* then = IfTrue(iff); Node* elsen = IfFalse(iff); diff a/src/hotspot/share/opto/idealKit.hpp b/src/hotspot/share/opto/idealKit.hpp --- a/src/hotspot/share/opto/idealKit.hpp +++ b/src/hotspot/share/opto/idealKit.hpp @@ -125,10 +125,14 @@ Node* promote_to_phi(Node* n, Node* reg);// Promote "n" to a phi on region "reg" bool was_promoted_to_phi(Node* n, Node* reg) { return (n->is_Phi() && n->in(0) == reg); } void declare(IdealVariable* v) { v->set_id(_var_ct++); } + + void if_then_common(Node* bol, float prob = PROB_FAIR, float cnt = COUNT_UNKNOWN, + bool push_new_state = true); + // This declares the position where vars are kept in the cvstate // For some degree of consistency we use the TypeFunc enum to // soak up spots in the inputs even though we only use early Control // and Memory slots. (So far.) static const uint first_var; // = TypeFunc::Parms + 1; @@ -161,10 +165,13 @@ Node* value(IdealVariable& v) { return _cvstate->in(first_var + v.id()); } void dead(IdealVariable& v) { set(v, (Node*)NULL); } void if_then(Node* left, BoolTest::mask relop, Node* right, float prob = PROB_FAIR, float cnt = COUNT_UNKNOWN, bool push_new_state = true); + void uif_then(Node* left, BoolTest::mask relop, Node* right, + float prob = PROB_FAIR, float cnt = COUNT_UNKNOWN, + bool push_new_state = true); void else_(); void end_if(); void loop(GraphKit* gkit, int nargs, IdealVariable& iv, Node* init, BoolTest::mask cmp, Node* limit, float prob = PROB_LIKELY(0.9), float cnt = COUNT_UNKNOWN); void end_loop(); @@ -180,20 +187,23 @@ Node* ConI(jint k) { return (Node*)gvn().intcon(k); } Node* makecon(const Type *t) const { return _gvn.makecon(t); } Node* AddI(Node* l, Node* r) { return transform(new AddINode(l, r)); } Node* SubI(Node* l, Node* r) { return transform(new SubINode(l, r)); } + Node* SubL(Node* l, Node* r) { return transform(new SubLNode(l, r)); } Node* AndI(Node* l, Node* r) { return transform(new AndINode(l, r)); } Node* OrI(Node* l, Node* r) { return transform(new OrINode(l, r)); } Node* MaxI(Node* l, Node* r) { return transform(new MaxINode(l, r)); } Node* LShiftI(Node* l, Node* r) { return transform(new LShiftINode(l, r)); } Node* CmpI(Node* l, Node* r) { return transform(new CmpINode(l, r)); } + Node* CmpU(Node* l, Node* r) { return transform(new CmpUNode(l, r)); } Node* Bool(Node* cmp, BoolTest::mask relop) { return transform(new BoolNode(cmp, relop)); } void increment(IdealVariable& v, Node* j) { set(v, AddI(value(v), j)); } void decrement(IdealVariable& v, Node* j) { set(v, SubI(value(v), j)); } Node* CmpL(Node* l, Node* r) { return transform(new CmpLNode(l, r)); } + Node* CmpUL(Node* l, Node* r) { return transform(new CmpULNode(l, r)); } // TLS Node* thread() { return gvn().transform(new ThreadLocalNode()); } // Pointers diff a/src/hotspot/share/opto/loopnode.cpp b/src/hotspot/share/opto/loopnode.cpp --- a/src/hotspot/share/opto/loopnode.cpp +++ b/src/hotspot/share/opto/loopnode.cpp @@ -3889,11 +3889,11 @@ (n->in(0)->Opcode() == Op_IfFalse && (1.0 - iff->as_If()->_prob) >= 0.01) || (iff->as_If()->_prob >= 0.01) ) innermost->_has_call = 1; } - } else if( n->is_Allocate() && n->as_Allocate()->_is_scalar_replaceable ) { + } else if( n->is_Allocate() && (n->as_Allocate()->_is_scalar_replaceable || n->as_Allocate()->_is_stack_allocateable) ) { // Disable loop optimizations if the loop has a scalar replaceable // allocation. This disabling may cause a potential performance lost // if the allocation is not eliminated for some reason. innermost->_allow_optimizations = false; innermost->_has_call = 1; // = true diff a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp --- a/src/hotspot/share/opto/machnode.hpp +++ b/src/hotspot/share/opto/machnode.hpp @@ -844,11 +844,15 @@ return in(_jvmadj + jvms->locoff() + idx); } Node *stack(const JVMState* jvms, uint idx) const { assert(verify_jvms(jvms), "jvms must match"); return in(_jvmadj + jvms->stkoff() + idx); - } + } + Node *scalar(const JVMState* jvms, uint idx) const { + assert(verify_jvms(jvms), "jvms must match"); + return in(_jvmadj + jvms->scloff() + idx); + } Node *monitor_obj(const JVMState* jvms, uint idx) const { assert(verify_jvms(jvms), "jvms must match"); return in(_jvmadj + jvms->monitor_obj_offset(idx)); } Node *monitor_box(const JVMState* jvms, uint idx) const { diff a/src/hotspot/share/opto/macro.cpp b/src/hotspot/share/opto/macro.cpp --- a/src/hotspot/share/opto/macro.cpp +++ b/src/hotspot/share/opto/macro.cpp @@ -301,10 +301,17 @@ } else { assert(adr_idx == Compile::AliasIdxRaw, "address must match or be raw"); } mem = mem->in(MemNode::Memory); } else if (mem->is_ClearArray()) { + intptr_t offset; + AllocateNode* alloc = AllocateNode::Ideal_allocation(mem->in(3), phase, offset); + + if (alloc == NULL) { + return start_mem; + } + if (!ClearArrayNode::step_through(&mem, alloc->_idx, phase)) { // Can not bypass initialization of the instance // we are looking. debug_only(intptr_t offset;) assert(alloc == AllocateNode::Ideal_allocation(mem->in(3), phase, offset), "sanity"); @@ -725,11 +732,11 @@ } } } #ifndef PRODUCT - if (PrintEliminateAllocations) { + if (print_eliminate_allocations()) { if (can_eliminate) { tty->print("Scalar "); if (res == NULL) alloc->dump(); else @@ -750,10 +757,22 @@ } #endif return can_eliminate; } +void PhaseMacroExpand::adjust_safepoint_jvms(SafePointNode* sfpt, Node* res, SafePointScalarObjectNode* sobj) { + JVMState *jvms = sfpt->jvms(); + jvms->set_endoff(sfpt->req()); + + // Now make a pass over the debug information replacing any references + // to the allocated object with "sobj" + int start = jvms->debug_start(); + int end = jvms->debug_end(); + sfpt->replace_edges_in_range(res, sobj, start, end); + _igvn._worklist.push(sfpt); +} + // Do scalar replacement. bool PhaseMacroExpand::scalar_replacement(AllocateNode *alloc, GrowableArray & safepoints) { GrowableArray safepoints_done; ciKlass* klass = NULL; @@ -882,11 +901,11 @@ } } _igvn._worklist.push(sfpt_done); } #ifndef PRODUCT - if (PrintEliminateAllocations) { + if (print_eliminate_allocations()) { if (field != NULL) { tty->print("=== At SafePoint node %d can't find value of Field: ", sfpt->_idx); field->print(); int field_idx = C->get_alias_index(field_addr_type); @@ -913,18 +932,11 @@ field_val = transform_later(new DecodeNNode(field_val, field_val->get_ptr_type())); } } sfpt->add_req(field_val); } - JVMState *jvms = sfpt->jvms(); - jvms->set_endoff(sfpt->req()); - // Now make a pass over the debug information replacing any references - // to the allocated object with "sobj" - int start = jvms->debug_start(); - int end = jvms->debug_end(); - sfpt->replace_edges_in_range(res, sobj, start, end); - _igvn._worklist.push(sfpt); + adjust_safepoint_jvms(sfpt, res, sobj); safepoints_done.append_if_missing(sfpt); // keep it for rollback } return true; } @@ -1016,10 +1028,14 @@ } assert(res->outcnt() == 0, "all uses of allocated objects must be deleted"); _igvn.remove_dead_node(res); } + eliminate_unused_allocation_edges(alloc); +} + +void PhaseMacroExpand::eliminate_unused_allocation_edges(CallNode* alloc) { // // Process other users of allocation's projections // if (_resproj != NULL && _resproj->outcnt() != 0) { // First disconnect stores captured by Initialize node. @@ -1084,10 +1100,492 @@ if (_catchallcatchproj != NULL) { _igvn.replace_node(_catchallcatchproj, C->top()); } } +#define STACK_REG_BUFFER 4 + +bool PhaseMacroExpand::stack_allocation_location_representable(int slot_location) { + // TODO This is likely not enough as there are values on the stack above the fixed slots + // Revist to see if there is a better check + OptoReg::Name stack_reg = OptoReg::stack2reg(slot_location + STACK_REG_BUFFER); + if (RegMask::can_represent(stack_reg)) { + return true; + } else { + return false; + } +} + +#undef STACK_REG_BUFFER + +int PhaseMacroExpand::next_stack_allocated_object(int num_slots) { + int current = C->fixed_slots(); + int next = current + num_slots; + if (!stack_allocation_location_representable(next)) { + return -1; + } + // Keep the toplevel high water mark current: + if (C->fixed_slots() < next) C->set_fixed_slots(next); + return current; +} + +bool PhaseMacroExpand::process_write_barriers_on_stack_allocated_objects(AllocateNode* alloc) { + GrowableArray barriers; + Node *res = alloc->result_cast(); + assert(res != NULL, "result node must not be null"); + + // Find direct barriers on the stack allocated objects. + // Those we can simply eliminate. + for (DUIterator_Fast imax, i = res->fast_outs(imax); i < imax; i++) { + Node *use = res->fast_out(i); + if (use->Opcode() == Op_CastP2X) { + barriers.append_if_missing(use); + } else if (use->is_AddP()) { + for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) { + Node *addp_out = use->fast_out(j); + if (addp_out->Opcode() == Op_CastP2X) { + barriers.append_if_missing(addp_out); + } + } + } + } + + while (barriers.length() != 0) { + eliminate_gc_barrier(barriers.pop()); + } + + // After removing the direct barriers result may no longer be used + if (alloc->result_cast() == NULL) { + return true; + } + + // Next walk all uses of the allocate to discover the barriers that + // might be reachable from our allocate. If the barrier is reachable + // from stack allocated object, we unregister it, so that the check + // elimination code doesn't run on it. + VectorSet visited(Thread::current()->resource_area()); + GrowableArray node_worklist; + + BarrierSetC2 *bs = BarrierSet::barrier_set()->barrier_set_c2(); + + node_worklist.push(res); + + while(node_worklist.length() != 0) { + Node* n = node_worklist.pop(); + + if (visited.test_set(n->_idx)) { + continue; // already processed + } + + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node *use = n->fast_out(i); + if (use->Opcode() == Op_CastP2X) { + bs->unregister_potential_barrier_node(use); + } else if (use->is_Phi() || + use->is_CheckCastPP() || + use->is_EncodeP() || + use->is_DecodeN() || + use->is_SafePoint() || + use->is_Proj() || + (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) { + // Find barriers beyond our current result + node_worklist.push(use); + } else if (use->is_Store() && use->Opcode() == Op_StoreP) { + if (n != use->in(MemNode::ValueIn)) { + continue; + } + // TODO code copied from escape.cpp::ConnectionGraph::get_addp_base. + // Common up this code into a helper + Node *memory = use->in(MemNode::Address); + if (memory->is_AddP()) { + Node *base = memory->in(AddPNode::Base); + if (base->uncast()->is_top()) { // The AddP case #3 and #6 and #9. + base = memory->in(AddPNode::Address); + while (base->is_AddP()) { + // Case #6 (unsafe access) may have several chained AddP nodes. + assert(base->in(AddPNode::Base)->uncast()->is_top(), "expected unsafe access address only"); + base = base->in(AddPNode::Address); + } + if (base->Opcode() == Op_CheckCastPP && + base->bottom_type()->isa_rawptr() && + _igvn.type(base->in(1))->isa_oopptr()) { + base = base->in(1); // Case #9 + } + } + node_worklist.push(base); + } + } else if (use->is_AddP() || + (use->is_Load() && use->Opcode() == Op_LoadP)) { + // Find barriers for loads + node_worklist.push(use); + } + } + } + return false; +} + +bool PhaseMacroExpand::register_stack_allocated_object_with_safepoints(AllocateNode* alloc, Node* stack_oop) { + VectorSet visited(Thread::current()->resource_area()); + GrowableArray node_worklist; + GrowableArray temp; + Dict* safepoint_map = new Dict(cmpkey, hashkey); + bool found_non_direct_safepoint = false; + Node *res = alloc->result_cast(); + + assert(res != NULL, "result node must not be null"); + + node_worklist.push(res); + + while(node_worklist.length() != 0) { + Node* n = node_worklist.pop(); + + if (visited.test_set(n->_idx)) { + continue; // already processed + } + + for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) { + Node *use = n->fast_out(i); + if (use->is_SafePoint()) { + SafePointNode* sfpt = use->as_SafePoint(); + if (sfpt->jvms() != NULL) { + temp.push(sfpt); + } + } else if (use->is_Phi() || + use->is_CheckCastPP() || + use->is_EncodeP() || + use->is_DecodeN() || + use->is_Proj() || + (use->Opcode() == Op_CastP2X) || + use->is_MergeMem() || + use->is_MemBar() || + (use->is_ConstraintCast() && use->Opcode() == Op_CastPP)) { + // Find safepoints beyond our current result + node_worklist.push(use); + } else if (use->is_Store() && use->Opcode() == Op_StoreP) { + node_worklist.push(use); + if (n != use->in(MemNode::ValueIn)) { + continue; + } + // TODO code copied from escape.cpp::ConnectionGraph::get_addp_base. + // Common up this code into a helper + Node *memory = use->in(MemNode::Address); + if (memory->is_AddP()) { + Node *base = memory->in(AddPNode::Base); + if (base->uncast()->is_top()) { // The AddP case #3 and #6 and #9. + base = memory->in(AddPNode::Address); + while (base->is_AddP()) { + // Case #6 (unsafe access) may have several chained AddP nodes. + assert(base->in(AddPNode::Base)->uncast()->is_top(), "expected unsafe access address only"); + base = base->in(AddPNode::Address); + } + if (base->Opcode() == Op_CheckCastPP && + base->bottom_type()->isa_rawptr() && + _igvn.type(base->in(1))->isa_oopptr()) { + base = base->in(1); // Case #9 + } + } + node_worklist.push(base); + } + } else if (use->is_AddP() || + (use->is_Load() && use->Opcode() == Op_LoadP)) { + // Find safepoints for arrays + node_worklist.push(use); + } + } + + while (temp.length() != 0) { + SafePointNode* sfpt = temp.pop(); + if (res != n) { + found_non_direct_safepoint = true; + } + handle_safepoint_for_stack_allocation(safepoint_map, alloc, stack_oop, n, sfpt); + } + } + + return found_non_direct_safepoint; +} + +void PhaseMacroExpand::handle_safepoint_for_stack_allocation(Dict* safepoint_map, AllocateNode* alloc, Node* oop_node, Node* parent, SafePointNode* sfpt) { + Node* res = alloc->result_cast(); + assert(res->is_CheckCastPP(), "unexpected AllocateNode result"); + const TypeOopPtr* res_type = _igvn.type(res)->isa_oopptr(); + ciKlass* klass = res_type->klass(); + int nfields = 0; + if (res_type->isa_instptr()) { + // find the fields of the class which will be needed for safepoint debug information + assert(klass->is_instance_klass(), "must be an instance klass."); + ciInstanceKlass* iklass = klass->as_instance_klass(); + nfields = iklass->nof_nonstatic_fields(); + } else { + // find the array's elements which will be needed for safepoint debug information + nfields = alloc->in(AllocateNode::ALength)->find_int_con(-1); + } + + assert(nfields >= 0, "Sanity"); + + SafePointScalarObjectNode* sobj = NULL; + Node *result = (Node *)(*safepoint_map)[sfpt]; + if (result != NULL) { + assert(result->is_SafePointScalarObject(), "Has to be a safepointscalarobject"); + sobj = result->as_SafePointScalarObject(); + } else { + // + // Process the safepoint uses + // + Node* mem = sfpt->memory(); + Node* ctl = sfpt->control(); + assert(sfpt->jvms() != NULL, "missed JVMS"); + // Fields of scalar objs are referenced only at the end + // of regular debuginfo at the last (youngest) JVMS. + // Record relative start index. + uint first_ind = (sfpt->req() - sfpt->jvms()->scloff()); + sobj = new SafePointScalarObjectNode(res_type, +#ifdef ASSERT + alloc, +#endif + first_ind, nfields); + sobj->init_req(0, C->root()); + sobj->add_req(oop_node); + transform_later(sobj); + sobj->set_stack_allocated(true); + + JVMState *jvms = sfpt->jvms(); + sfpt->add_req(sobj); + jvms->set_endoff(sfpt->req()); + _igvn._worklist.push(sfpt); + safepoint_map->Insert(sfpt, sobj); + } + + if (parent == res) { + adjust_safepoint_jvms(sfpt, parent, sobj); + } +} + +bool PhaseMacroExpand::can_stack_allocate(AllocateNode* alloc, Node* res, intptr_t size_of_object) { + return ((res != NULL) && alloc->_is_stack_allocateable && (size_of_object != -1) && should_stack_allocate()); +} + +void PhaseMacroExpand::estimate_stack_allocation_size(AllocateNode* alloc) { + Node* res = alloc->result_cast(); + Node* size_in_bytes = alloc->in(AllocateNode::AllocSize); + intptr_t size_of_object = _igvn.find_intptr_t_con(size_in_bytes, -1); + + if (alloc->_is_scalar_replaceable && !alloc->_is_stack_allocateable) { + C->set_fail_stack_allocation_with_references(true); + return; + } + + bool can_sa = can_stack_allocate(alloc, res, size_of_object); + if (alloc->_is_stack_allocateable && !can_sa) { + // If we marked the object as SA in EA and now we can not fail + C->set_fail_stack_allocation_with_references(true); + return; + } + + if (!alloc->_is_stack_allocateable) { + // If we can not SA because EA said no then no need to count the size + return; + } + + int current = C->stack_allocated_slots(); + C->set_stack_allocated_slots(current + (size_of_object >> LogBytesPerInt)); +} + +// Do stack allocation +bool PhaseMacroExpand::stack_allocation(AllocateNode* alloc) { + Node* klass = alloc->in(AllocateNode::KlassNode); + const TypeKlassPtr* tklass = _igvn.type(klass)->is_klassptr(); + Node *length = (alloc->is_AllocateArray()) ? alloc->in(AllocateNode::ALength) : NULL; + Node* size_in_bytes = alloc->in(AllocateNode::AllocSize); + Node* res = alloc->result_cast(); + Node* ctrl = alloc->in(TypeFunc::Control); + Node* mem = alloc->in(TypeFunc::Memory); + + intptr_t size_of_object = _igvn.find_intptr_t_con(size_in_bytes, -1); + + if (!can_stack_allocate(alloc, res, size_of_object)) { + return false; + } + + if (C->fail_stack_allocation_with_references()) { + if (alloc->_is_referenced_stack_allocation) { +#ifndef PRODUCT + if (print_stack_allocation()) { + tty->print_cr("---- Avoiding stack allocation on node %d because it is referenced by another alloc and SCR/SA failed in method %s", alloc->_idx, _igvn.C->method()->get_Method()->name_and_sig_as_C_string()); + } +#endif + return false; + } + } + + int next_stack_allocation_slot = next_stack_allocated_object(size_of_object >> LogBytesPerInt); + if (next_stack_allocation_slot < 0) { +#ifndef PRODUCT + if (print_stack_allocation()) { + tty->print_cr("---- Avoiding stack allocation on node %d with size %ld for method %s because of insufficient stack space", alloc->_idx, size_of_object, _igvn.C->method()->get_Method()->name_and_sig_as_C_string()); + } +#endif + return false; + } + + if (mem->is_MergeMem()) { + mem = mem->as_MergeMem()->memory_at(Compile::AliasIdxRaw); + } + + extract_call_projections(alloc); + + // Process barriers as this may result in result_cast() becoming NULL + if (process_write_barriers_on_stack_allocated_objects(alloc)) { +#ifndef PRODUCT + if (print_stack_allocation()) { + tty->print_cr("---- Allocation %d result_cast is no longer used so yank the alloc instead", alloc->_idx); + } +#endif + InitializeNode* init = alloc->initialization(); + if (init != NULL) { + init->remove(&_igvn); + } + yank_alloc_node(alloc); + return true; + } + + assert(res == alloc->result_cast(), "values much match"); + + Node* stack_oop = transform_later(new BoxLockNode(next_stack_allocation_slot)); + Node* new_raw_mem = initialize_object(alloc, ctrl, mem, stack_oop, klass, length, size_in_bytes); + + bool non_direct_safepoints = register_stack_allocated_object_with_safepoints(alloc, stack_oop); + if (non_direct_safepoints) { + if (length != NULL) { + stack_allocation_init_array_length_on_entry(alloc, length, stack_oop); + } +#ifndef PRODUCT + stack_allocation_clear_object_data(alloc, stack_oop); +#endif + } + + _igvn.replace_node(_resproj, stack_oop); + + for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) { + Node *use = _memproj_fallthrough->fast_out(i); + _igvn.rehash_node_delayed(use); + imax -= replace_input(use, _memproj_fallthrough, new_raw_mem); + // back up iterator + --i; + } + + eliminate_unused_allocation_edges(alloc); + + assert(_resproj->outcnt() == 0, "all uses of the original allocate result projection must be deleted"); + _igvn.remove_dead_node(_resproj); + +#ifndef PRODUCT + if (print_stack_allocation()) { + tty->print_cr("++++ Performing stack allocation on node %d with size %ld for method %s", alloc->_idx, size_of_object, _igvn.C->method()->get_Method()->name_and_sig_as_C_string()); + } +#endif + + return true; +} + +/* + Initialize stack allocated array length on entry to the method. + This is required for de-opt so it can verify array lengths and so + that GCs that happen after deopt will not crash for uninitialized + arrays. +*/ +void PhaseMacroExpand::stack_allocation_init_array_length_on_entry(AllocateNode *alloc, Node *length, Node *stack_oop) { + Node* start_mem = C->start()->proj_out_or_null(TypeFunc::Memory); + assert(length != NULL, "Length can not be NULL"); + + if (C->is_osr_compilation()) { + for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) { + Node *child = start_mem->fast_out(i); + if (child->is_CallLeaf() && child->as_CallLeaf()->is_call_to_osr_migration_end()) { + CallLeafNode* call_leaf = child->as_CallLeaf(); + start_mem = call_leaf->proj_out_or_null(TypeFunc::Memory); + break; + } + } + } + assert(start_mem != NULL, "Must find start mem"); + Node* init_mem = start_mem; + + // need to set the length field for arrays for deopt + init_mem = make_store(C->start()->proj_out_or_null(TypeFunc::Control), + init_mem, stack_oop, arrayOopDesc::length_offset_in_bytes(), + length, T_INT); + + + if (init_mem != start_mem) { + for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) { + Node *use = start_mem->fast_out(i); + // Compressed refs can make a new store which adjusts the start + // offet and it's sourced by start_mem. Make sure we don't make cycle. + if (use == init_mem || (init_mem->find_edge(use) >= 0)) { + continue; + } + _igvn.rehash_node_delayed(use); + imax -= replace_input(use, start_mem, init_mem); + // back up iterator + --i; + } + } +} + +#ifndef PRODUCT +/* + Initialize SA object on entry to the method to ensure it is initialized + before safepoints which may only be reachable through phis and the object + may not actually have been initialized. +*/ +void PhaseMacroExpand::stack_allocation_clear_object_data(AllocateNode *alloc, Node *stack_oop) { + Node* klass = alloc->in(AllocateNode::KlassNode); + Node *length = (alloc->is_AllocateArray()) ? alloc->in(AllocateNode::ALength) : NULL; + Node* size_in_bytes = alloc->in(AllocateNode::AllocSize); + Node* start_mem = C->start()->proj_out_or_null(TypeFunc::Memory); + if (C->is_osr_compilation()) { + for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) { + Node *child = start_mem->fast_out(i); + if (child->is_CallLeaf() && child->as_CallLeaf()->is_call_to_osr_migration_end()) { + CallLeafNode* call_leaf = child->as_CallLeaf(); + start_mem = call_leaf->proj_out_or_null(TypeFunc::Memory); + break; + } + } + } + assert(start_mem != NULL, "Must find start mem"); + int header_size = alloc->minimum_header_size(); + Node* init_mem = start_mem; + if (length != NULL) { + // conservatively small header size: + header_size = arrayOopDesc::base_offset_in_bytes(T_BYTE); + ciKlass* k = _igvn.type(klass)->is_klassptr()->klass(); + if (k->is_array_klass()) { // we know the exact header size in most cases: + header_size = Klass::layout_helper_header_size(k->layout_helper()); + } + } + init_mem = ClearArrayNode::clear_memory(C->start()->proj_out_or_null(TypeFunc::Control), + init_mem, stack_oop, header_size, size_in_bytes, + &_igvn); + if (init_mem != start_mem) { + for (DUIterator_Fast imax, i = start_mem->fast_outs(imax); i < imax; i++) { + Node *use = start_mem->fast_out(i); + // Compressed refs can make a new store which adjusts the start + // offet and it's sourced by start_mem. Make sure we don't make cycle. + if (use == init_mem || (init_mem->find_edge(use) >= 0)) { + continue; + } + _igvn.rehash_node_delayed(use); + imax -= replace_input(use, start_mem, init_mem); + // back up iterator + --i; + } + } +} +#endif + bool PhaseMacroExpand::eliminate_allocate_node(AllocateNode *alloc) { // Don't do scalar replacement if the frame can be popped by JVMTI: // if reallocation fails during deoptimization we'll pop all // interpreter frames for this compiled frame and that won't play // nice with JVMTI popframe. @@ -1140,11 +1638,11 @@ } process_users_of_allocation(alloc); #ifndef PRODUCT - if (PrintEliminateAllocations) { + if (print_eliminate_allocations()) { if (alloc->is_AllocateArray()) tty->print_cr("++++ Eliminated: %d AllocateArray", alloc->_idx); else tty->print_cr("++++ Eliminated: %d Allocate", alloc->_idx); } @@ -1181,11 +1679,11 @@ } process_users_of_allocation(boxing); #ifndef PRODUCT - if (PrintEliminateAllocations) { + if (print_eliminate_allocations()) { tty->print("++++ Eliminated: %d ", boxing->_idx); boxing->method()->print_short_name(tty); tty->cr(); } #endif @@ -2777,10 +3275,30 @@ _igvn.optimize(); if (C->failing()) return true; _igvn.set_delay_transform(true); } + for (int i = C->macro_count(); i > 0; i --) { + Node * n = C->macro_node(i-1); + assert(n->is_macro(), "only macro nodes expected here"); + + switch (n->class_id()) { + case Node::Class_Allocate: + case Node::Class_AllocateArray: + estimate_stack_allocation_size(n->as_Allocate()); + break; + default: + assert(false, "unknown node type in macro list"); + } + } + + // Check to see if stack allocation size is too large before macro expansion + // so we can reject required stack allocations + if (!stack_allocation_location_representable(C->fixed_slots() + C->stack_allocated_slots())) { + C->set_fail_stack_allocation_with_references(true); + } + // All nodes except Allocate nodes are expanded now. There could be // new optimization opportunities (such as folding newly created // load from a just allocated object). Run IGVN. // expand "macro" nodes @@ -2800,14 +3318,18 @@ if (C->check_node_count(300, "out of nodes before macro expansion")) { return true; } switch (n->class_id()) { case Node::Class_Allocate: - expand_allocate(n->as_Allocate()); + if (!stack_allocation(n->as_Allocate())) { + expand_allocate(n->as_Allocate()); + } break; case Node::Class_AllocateArray: - expand_allocate_array(n->as_AllocateArray()); + if (!stack_allocation(n->as_AllocateArray())) { + expand_allocate_array(n->as_AllocateArray()); + } break; default: assert(false, "unknown node type in macro list"); } assert(C->macro_count() < macro_count, "must have deleted a node from macro list"); diff a/src/hotspot/share/opto/macro.hpp b/src/hotspot/share/opto/macro.hpp --- a/src/hotspot/share/opto/macro.hpp +++ b/src/hotspot/share/opto/macro.hpp @@ -102,16 +102,33 @@ address slow_call_address); void yank_initalize_node(InitializeNode* node); void yank_alloc_node(AllocateNode* alloc); Node *value_from_mem(Node *mem, Node *ctl, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc); Node *value_from_mem_phi(Node *mem, BasicType ft, const Type *ftype, const TypeOopPtr *adr_t, AllocateNode *alloc, Node_Stack *value_phis, int level); + const Type* field_type_from_element(ciField* field, ciType* elem_type, BasicType& basic_elem_type); + void adjust_safepoint_jvms(SafePointNode* sfpt, Node* res, SafePointScalarObjectNode* sobj); + void retrieve_type_and_size_info(AllocateNode *alloc, Node *res, const TypeOopPtr** res_type, ciKlass** klass, ciInstanceKlass** iklass, int* nfields); + void retrieve_array_type_and_size_info(ciKlass* klass, ciType** elem_type, BasicType* basic_elem_type, int* element_size, int* array_base); bool eliminate_boxing_node(CallStaticJavaNode *boxing); bool eliminate_allocate_node(AllocateNode *alloc); bool can_eliminate_allocation(AllocateNode *alloc, GrowableArray & safepoints); bool scalar_replacement(AllocateNode *alloc, GrowableArray & safepoints_done); void process_users_of_allocation(CallNode *alloc); + bool stack_allocation(AllocateNode *alloc); + void eliminate_unused_allocation_edges(CallNode* alloc); + void handle_safepoint_for_stack_allocation(Dict* safepoint_map, AllocateNode* alloc, Node* oop_node, Node* parent, SafePointNode* sfpt); + bool process_write_barriers_on_stack_allocated_objects(AllocateNode* alloc); + bool register_stack_allocated_object_with_safepoints(AllocateNode* alloc, Node* stack_oop); + void stack_allocation_init_array_length_on_entry(AllocateNode *alloc, Node *length, Node *stack_oop); +#ifndef PRODUCT + void stack_allocation_clear_object_data(AllocateNode *alloc, Node *stack_oop); +#endif + void estimate_stack_allocation_size(AllocateNode* alloc); + bool can_stack_allocate(AllocateNode* alloc, Node *res, intptr_t size_of_object); + bool stack_allocation_location_representable(int n); + int next_stack_allocated_object(int num_slots); void eliminate_gc_barrier(Node *p2x); void mark_eliminated_box(Node* box, Node* obj); void mark_eliminated_locking_nodes(AbstractLockNode *alock); bool eliminate_locking_node(AbstractLockNode *alock); @@ -201,10 +218,23 @@ Node* klass_node, Node* length, Node* size_in_bytes); Node* make_arraycopy_load(ArrayCopyNode* ac, intptr_t offset, Node* ctl, Node* mem, BasicType ft, const Type *ftype, AllocateNode *alloc); + bool should_stack_allocate() { + return C->do_stack_allocation(); + } + +#ifndef PRODUCT + bool print_eliminate_allocations() { + return PrintEliminateAllocations || C->directive()->PrintEliminateAllocationsOption; + } + bool print_stack_allocation() { + return PrintStackAllocation || C->directive()->PrintStackAllocationOption; + } +#endif + public: PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn), _has_locks(false) { _igvn.set_delay_transform(true); } void eliminate_macro_nodes(); diff a/src/hotspot/share/opto/memnode.cpp b/src/hotspot/share/opto/memnode.cpp --- a/src/hotspot/share/opto/memnode.cpp +++ b/src/hotspot/share/opto/memnode.cpp @@ -186,11 +186,14 @@ result = proj_in->in(TypeFunc::Memory); } else { assert(false, "unexpected projection"); } } else if (result->is_ClearArray()) { - if (!is_instance || !ClearArrayNode::step_through(&result, instance_id, phase)) { + intptr_t offset; + AllocateNode* alloc = AllocateNode::Ideal_allocation(result->in(3), phase, offset); + + if (!is_instance || (alloc == NULL) || !ClearArrayNode::step_through(&result, instance_id, phase)) { // Can not bypass initialization of the instance // we are looking for. break; } // Otherwise skip it (the call updated 'result' value). @@ -706,11 +709,14 @@ break; } mem = mem->in(0)->in(TypeFunc::Memory); continue; // (a) advance through independent MemBar memory } else if (mem->is_ClearArray()) { - if (ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) { + intptr_t offset; + AllocateNode* alloc = AllocateNode::Ideal_allocation(mem->in(3), phase, offset); + + if ((alloc != NULL) && ClearArrayNode::step_through(&mem, (uint)addr_t->instance_id(), phase)) { // (the call updated 'mem' value) continue; // (a) advance through independent allocation memory } else { // Can not bypass initialization of the instance // we are looking for. diff a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -757,18 +757,28 @@ ObjectValue* sv = sv_for_node_id(objs, spobj->_idx); if (sv == NULL) { ciKlass* cik = t->is_oopptr()->klass(); assert(cik->is_instance_klass() || cik->is_array_klass(), "Not supported allocation."); - sv = new ObjectValue(spobj->_idx, + if (spobj->stack_allocated()) { + Node *box_lock = spobj->in(1); + assert(box_lock != NULL, "Need to have a box lock"); + sv = new StackObjectValue(spobj->_idx, + new ConstantOopWriteValue(cik->java_mirror()->constant_encoding()), + Location::new_stk_loc(Location::oop, C->regalloc()->reg2offset(BoxLockNode::reg(box_lock))), + new ConstantIntValue(spobj->n_fields())); + set_sv_for_object_node(objs, sv); + } else { + sv = new ObjectValue(spobj->_idx, new ConstantOopWriteValue(cik->java_mirror()->constant_encoding())); - set_sv_for_object_node(objs, sv); + set_sv_for_object_node(objs, sv); - uint first_ind = spobj->first_index(sfpt->jvms()); - for (uint i = 0; i < spobj->n_fields(); i++) { - Node* fld_node = sfpt->in(first_ind+i); - (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs); + uint first_ind = spobj->first_index(sfpt->jvms()); + for (uint i = 0; i < spobj->n_fields(); i++) { + Node* fld_node = sfpt->in(first_ind+i); + (void)FillLocArray(sv->field_values()->length(), sfpt, fld_node, sv->field_values(), objs); + } } } array->append(sv); return; } @@ -1008,10 +1018,11 @@ // Loop over monitors and insert into array for (idx = 0; idx < num_mon; idx++) { // Grab the node that defines this monitor Node* box_node = sfn->monitor_box(jvms, idx); Node* obj_node = sfn->monitor_obj(jvms, idx); + bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated()); // Create ScopeValue for object ScopeValue *scval = NULL; if (obj_node->is_SafePointScalarObject()) { @@ -1020,18 +1031,30 @@ if (scval == NULL) { const Type *t = spobj->bottom_type(); ciKlass* cik = t->is_oopptr()->klass(); assert(cik->is_instance_klass() || cik->is_array_klass(), "Not supported allocation."); - ObjectValue* sv = new ObjectValue(spobj->_idx, - new ConstantOopWriteValue(cik->java_mirror()->constant_encoding())); - PhaseOutput::set_sv_for_object_node(objs, sv); - - uint first_ind = spobj->first_index(youngest_jvms); - for (uint i = 0; i < spobj->n_fields(); i++) { - Node* fld_node = sfn->in(first_ind+i); - (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs); + ObjectValue* sv = NULL; + if (spobj->stack_allocated()) { + Node *box_lock = spobj->in(1); + assert(box_lock != NULL, "Need to have a box lock"); + assert(eliminated, "monitor has to be eliminated for stack allocation"); + sv = new StackObjectValue(spobj->_idx, + new ConstantOopWriteValue(cik->java_mirror()->constant_encoding()), + Location::new_stk_loc(Location::oop, C->regalloc()->reg2offset(BoxLockNode::reg(box_lock))), + new ConstantIntValue(spobj->n_fields())); + set_sv_for_object_node(objs, sv); + } else { + sv = new ObjectValue(spobj->_idx, + new ConstantOopWriteValue(cik->java_mirror()->constant_encoding())); + set_sv_for_object_node(objs, sv); + + uint first_ind = spobj->first_index(youngest_jvms); + for (uint i = 0; i < spobj->n_fields(); i++) { + Node* fld_node = sfn->in(first_ind+i); + (void)FillLocArray(sv->field_values()->length(), sfn, fld_node, sv->field_values(), objs); + } } scval = sv; } } else if (!obj_node->is_Con()) { OptoReg::Name obj_reg = C->regalloc()->get_reg_first(obj_node); @@ -1045,14 +1068,34 @@ scval = new ConstantOopWriteValue(tp->is_oopptr()->const_oop()->constant_encoding()); } OptoReg::Name box_reg = BoxLockNode::reg(box_node); Location basic_lock = Location::new_stk_loc(Location::normal,C->regalloc()->reg2offset(box_reg)); - bool eliminated = (box_node->is_BoxLock() && box_node->as_BoxLock()->is_eliminated()); monarray->append(new MonitorValue(scval, basic_lock, eliminated)); } + for (idx = 0; idx < jvms->scl_size(); idx++ ) { + Node* obj_node = sfn->scalar(jvms, idx); + + if (obj_node->is_SafePointScalarObject()) { + SafePointScalarObjectNode* spobj = obj_node->as_SafePointScalarObject(); + if (sv_for_node_id(objs, spobj->_idx) == NULL) { + const Type *t = spobj->bottom_type(); + ciKlass* cik = t->is_oopptr()->klass(); + assert(cik->is_instance_klass() || + cik->is_array_klass(), "Not supported allocation."); + assert(spobj->stack_allocated(), "has to be stack allocated"); + Node *box_lock = spobj->in(1); + assert(box_lock != NULL, "Need to have a box lock"); + StackObjectValue* sv = new StackObjectValue(spobj->_idx, + new ConstantOopWriteValue(cik->java_mirror()->constant_encoding()), + Location::new_stk_loc(Location::oop, C->regalloc()->reg2offset(BoxLockNode::reg(box_lock))), + new ConstantIntValue(spobj->n_fields())); + set_sv_for_object_node(objs, sv); + } + } + } // We dump the object pool first, since deoptimization reads it in first. C->debug_info()->dump_object_pool(objs); // Build first class objects to pass to scope DebugToken *locvals = C->debug_info()->create_scope_values(locarray); @@ -1272,10 +1315,17 @@ // fill in the nop array for bundling computations MachNode *_nop_list[Bundle::_nop_count]; Bundle::initialize_nops(_nop_list); + // if we are using stack allocation enable the runtime part + // stack allocation can be enabled selectively via compiler directive + // so we need to enable the runtime part + if (!UseStackAllocationRuntime && C->do_stack_allocation()) { + FLAG_SET_ERGO(UseStackAllocationRuntime, true); + } + return cb; } //------------------------------fill_buffer------------------------------------ void PhaseOutput::fill_buffer(CodeBuffer* cb, uint* blk_starts) { diff a/src/hotspot/share/runtime/deoptimization.cpp b/src/hotspot/share/runtime/deoptimization.cpp --- a/src/hotspot/share/runtime/deoptimization.cpp +++ b/src/hotspot/share/runtime/deoptimization.cpp @@ -301,11 +301,11 @@ bool jvmci_enabled = false; #endif // Reallocate the non-escaping objects and restore their fields. Then // relock objects if synchronization on them was eliminated. - if (jvmci_enabled COMPILER2_PRESENT( || (DoEscapeAnalysis && EliminateAllocations) )) { + if (jvmci_enabled COMPILER2_PRESENT( || (DoEscapeAnalysis && EliminateAllocations || (DoEscapeAnalysis && UseStackAllocationRuntime)) )) { realloc_failures = eliminate_allocations(thread, exec_mode, cm, deoptee, map, chunk); } #endif // COMPILER2_OR_JVMCI // Revoke biases, done with in java state. @@ -1004,16 +1004,27 @@ if (obj == NULL) { obj = ik->allocate_instance(THREAD); } } else if (k->is_typeArray_klass()) { TypeArrayKlass* ak = TypeArrayKlass::cast(k); - assert(sv->field_size() % type2size[ak->element_type()] == 0, "non-integral array length"); - int len = sv->field_size() / type2size[ak->element_type()]; + int len; + if (sv->is_stack_object()) { + len = ((StackObjectValue *)sv)->get_field_length()->value(); + } else { + assert(sv->field_size() % type2size[ak->element_type()] == 0, "non-integral array length"); + len = sv->field_size() / type2size[ak->element_type()]; + } obj = ak->allocate(len, THREAD); } else if (k->is_objArray_klass()) { ObjArrayKlass* ak = ObjArrayKlass::cast(k); - obj = ak->allocate(sv->field_size(), THREAD); + int len; + if (sv->is_stack_object()) { + len = ((StackObjectValue *)sv)->get_field_length()->value(); + } else { + len = sv->field_size(); + } + obj = ak->allocate(len, THREAD); } if (obj == NULL) { failures = true; } @@ -1031,10 +1042,22 @@ } return failures; } +void Deoptimization::reassign_scalar_replaced_fields(frame *fr, RegisterMap *reg_map, GrowableArray* objects, ObjectValue *sv, Handle obj, Klass* k, bool skip_internal) { + if (k->is_instance_klass()) { + InstanceKlass* ik = InstanceKlass::cast(k); + reassign_scalar_replaced_fields_by_klass(ik, fr, reg_map, objects, sv, 0, obj(), skip_internal); + } else if (k->is_typeArray_klass()) { + TypeArrayKlass* ak = TypeArrayKlass::cast(k); + reassign_scalar_replaced_type_array_elements(fr, reg_map, sv, (typeArrayOop) obj(), ak->element_type()); + } else if (k->is_objArray_klass()) { + reassign_scalar_replaced_object_array_elements(fr, reg_map, objects, sv, (objArrayOop) obj()); + } +} + #if INCLUDE_JVMCI /** * For primitive types whose kind gets "erased" at runtime (shorts become stack ints), * we need to somehow be able to recover the actual kind to be able to write the correct * amount of bytes. @@ -1092,11 +1115,11 @@ } #endif // INCLUDE_JVMCI // restore elements of an eliminated type array -void Deoptimization::reassign_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type) { +void Deoptimization::reassign_scalar_replaced_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type) { int index = 0; intptr_t val; for (int i = 0; i < sv->field_size(); i++) { StackValue* value = StackValue::create_stack_value(fr, reg_map, sv->field_at(i)); @@ -1191,14 +1214,15 @@ index++; } } // restore fields of an eliminated object array -void Deoptimization::reassign_object_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, objArrayOop obj) { +void Deoptimization::reassign_scalar_replaced_object_array_elements(frame* fr, RegisterMap* reg_map, GrowableArray* objects, ObjectValue* sv, objArrayOop obj) { for (int i = 0; i < sv->field_size(); i++) { - StackValue* value = StackValue::create_stack_value(fr, reg_map, sv->field_at(i)); + StackValue* value = StackValue::create_stack_value(fr, reg_map, get_scope_value(fr, reg_map, sv->field_at(i), objects)); assert(value->type() == T_OBJECT, "object element expected"); + assert(oopDesc::is_oop_or_null(value->get_obj()()), "must be oop"); obj->obj_at_put(i, value->get_obj()()); } } class ReassignedField { @@ -1214,13 +1238,78 @@ int compare(ReassignedField* left, ReassignedField* right) { return left->_offset - right->_offset; } +ScopeValue *Deoptimization::match_object_to_stack_oop(intptr_t *oop_ptr, intptr_t *sp_base, GrowableArray* objects) { + for (int j = 0; j < objects->length(); j++) { + ScopeValue* o_sv = objects->at(j); + if (o_sv->is_object()) { + if (o_sv->as_ObjectValue()->is_stack_object()) { + StackObjectValue *sov = (StackObjectValue *)o_sv; + Location o_loc = sov->get_stack_location(); + int o_offset = o_loc.stack_offset(); + int l_offset = (address)oop_ptr - (address)sp_base; + if (o_offset == l_offset) { + return o_sv; + } + } + } + } + return NULL; +} + +ScopeValue *Deoptimization::get_scope_value(frame* fr, RegisterMap* reg_map, ScopeValue* sv, GrowableArray* objects) { + if (sv->is_location()) { + if ((objects != NULL) && (objects->length() > 0)) { + LocationValue* lv = (LocationValue *)sv; + Location loc = lv->location(); + intptr_t *oop_ptr; + intptr_t *sp_base = fr->unextended_sp(); + intptr_t *sp_top = sp_base + fr->cb()->frame_size(); + if (loc.is_stack() && (loc.type() == Location::oop)) { + address value_addr = ((address)sp_base) + loc.stack_offset(); + oop val = *(oop *)value_addr; + oop_ptr = cast_from_oop(val); + } else if (loc.is_register() && (loc.type() == Location::oop)) { + address value_addr = reg_map->location(VMRegImpl::as_VMReg(loc.register_number())); + oop val = *(oop *)value_addr; + oop_ptr = cast_from_oop(val); + } else { + assert(loc.type() != Location::oop, "Can not be an oop"); + return sv; + } + if (sp_base <= oop_ptr && oop_ptr < sp_top) { + ScopeValue* o_sv = Deoptimization::match_object_to_stack_oop(oop_ptr, sp_base, objects); + if (o_sv != NULL) { + sv = o_sv; + } else { + assert(false, "pointer to stack but did not find object to replace"); + } + } + } + } else if (sv->is_object()) { + oop o = sv->as_ObjectValue()->value()(); + intptr_t *sp_base = fr->unextended_sp(); + intptr_t *sp_top = sp_base + fr->cb()->frame_size(); + intptr_t *oop_ptr = cast_from_oop(o); + if (sp_base <= oop_ptr && oop_ptr < sp_top) { + ScopeValue* o_sv = Deoptimization::match_object_to_stack_oop(oop_ptr, sp_base, objects); + if (o_sv != NULL) { + sv = o_sv; + assert(sv = o_sv, "objects have to match?"); + } else { + assert(false, "pointer to stack but did not find object to replace"); + } + } + } + return sv; +} + // Restore fields of an eliminated instance object using the same field order // returned by HotSpotResolvedObjectTypeImpl.getInstanceFields(true) -static int reassign_fields_by_klass(InstanceKlass* klass, frame* fr, RegisterMap* reg_map, ObjectValue* sv, int svIndex, oop obj, bool skip_internal) { +void Deoptimization::reassign_scalar_replaced_fields_by_klass(InstanceKlass* klass, frame* fr, RegisterMap* reg_map, GrowableArray* objects, ObjectValue* sv, int svIndex, oop obj, bool skip_internal) { GrowableArray* fields = new GrowableArray(); InstanceKlass* ik = klass; while (ik != NULL) { for (AllFieldStream fs(ik); !fs.done(); fs.next()) { if (!fs.access_flags().is_static() && (!skip_internal || !fs.access_flags().is_internal())) { @@ -1233,17 +1322,18 @@ ik = ik->superklass(); } fields->sort(compare); for (int i = 0; i < fields->length(); i++) { intptr_t val; - ScopeValue* scope_field = sv->field_at(svIndex); + ScopeValue* scope_field = get_scope_value(fr, reg_map, sv->field_at(svIndex), objects); StackValue* value = StackValue::create_stack_value(fr, reg_map, scope_field); int offset = fields->at(i)._offset; BasicType type = fields->at(i)._type; switch (type) { case T_OBJECT: case T_ARRAY: assert(value->type() == T_OBJECT, "Agreement."); + assert(oopDesc::is_oop_or_null(value->get_obj()()), "must be oop"); obj->obj_field_put(offset, value->get_obj()()); break; // Have to cast to INT (32 bits) pointer to avoid little/big-endian problem. case T_INT: case T_FLOAT: { // 4 bytes. @@ -1315,11 +1405,170 @@ default: ShouldNotReachHere(); } svIndex++; } - return svIndex; +} + +void Deoptimization::reassign_stack_allocated_type_array_elements(oop orig, oop newly_allocated, Klass *k) { + typeArrayOop orig_obj = (typeArrayOop) orig; + typeArrayOop new_obj = (typeArrayOop) newly_allocated; + assert(orig_obj->length() == new_obj->length(), "lengths have to be the same"); + TypeArrayKlass* ak = TypeArrayKlass::cast(k); + BasicType type = ak->element_type(); + for (int i = 0; i < orig_obj->length(); i++) { + switch (type) { + case T_BOOLEAN: + new_obj->bool_at_put(i, orig_obj->bool_at(i)); + break; + case T_CHAR: + new_obj->char_at_put(i, orig_obj->char_at(i)); + break; + case T_FLOAT: + new_obj->float_at_put(i, orig_obj->float_at(i)); + break; + case T_DOUBLE: + new_obj->double_at_put(i, orig_obj->double_at(i)); + break; + case T_BYTE: + new_obj->byte_at_put(i, orig_obj->byte_at(i)); + break; + case T_SHORT: + new_obj->short_at_put(i, orig_obj->short_at(i)); + break; + case T_INT: + new_obj->int_at_put(i, orig_obj->int_at(i)); + break; + case T_LONG: + new_obj->long_at_put(i, orig_obj->long_at(i)); + break; + default: + assert(false, "unreachable"); + } + } +} + +void Deoptimization::reassign_stack_allocated_object_array_elements(oop orig, oop newly_allocated, intptr_t *sp_base, intptr_t *sp_top, GrowableArray* objects) { + objArrayOop orig_obj = (objArrayOop) orig; + objArrayOop new_obj = (objArrayOop) newly_allocated; + assert(orig_obj->length() == new_obj->length(), "lengths have to be the same"); + for (int i = 0; i < orig_obj->length(); i++) { + oop o = orig_obj->obj_at(i); + intptr_t *oop_ptr = cast_from_oop(o); + if (sp_base <= oop_ptr && oop_ptr < sp_top) { + int field_offset = (address)oop_ptr - (address)sp_base; + bool found = false; + for (int j = 0; j < objects->length(); j++) { + ScopeValue* o_sv = objects->at(j); + if (o_sv->is_object() && o_sv->as_ObjectValue()->is_stack_object()) { + StackObjectValue *sov = (StackObjectValue *)o_sv; + Location o_loc = sov->get_stack_location(); + int o_offset = o_loc.stack_offset(); + if (o_offset == field_offset) { + o = sov->value()(); + found = true; + break; + } + } + } + assert(found, "pointer to stack but did not find object to replace"); + } + assert(oopDesc::is_oop_or_null(o), "must be oop"); + new_obj->obj_at_put(i, o); + } +} + +class ReassignStackObjectFields: public FieldClosure { + private: + oop _orig; + oop _new; + intptr_t *_sp_base; + intptr_t *_sp_top; + GrowableArray* _objects; + + public: + ReassignStackObjectFields(oop orig, oop n, intptr_t *sp_base, intptr_t *sp_top, GrowableArray* objects) : + _orig(orig), _new(n), _sp_base(sp_base), _sp_top(sp_top), _objects(objects) {} + + void do_field(fieldDescriptor* fd) { + BasicType ft = fd->field_type(); + switch (ft) { + case T_BYTE: + _new->byte_field_put(fd->offset(), _orig->byte_field(fd->offset())); + break; + case T_CHAR: + _new->char_field_put(fd->offset(), _orig->char_field(fd->offset())); + break; + case T_DOUBLE: + _new->double_field_put(fd->offset(), _orig->double_field(fd->offset())); + break; + case T_FLOAT: + _new->float_field_put(fd->offset(), _orig->float_field(fd->offset())); + break; + case T_INT: + _new->int_field_put(fd->offset(), _orig->int_field(fd->offset())); + break; + case T_LONG: + _new->long_field_put(fd->offset(), _orig->long_field(fd->offset())); + break; + case T_SHORT: + _new->short_field_put(fd->offset(), _orig->short_field(fd->offset())); + break; + case T_BOOLEAN: + _new->bool_field_put(fd->offset(), _orig->bool_field(fd->offset())); + break; + case T_ARRAY: + case T_OBJECT: { + oop o = _orig->obj_field(fd->offset()); + intptr_t *oop_ptr = cast_from_oop(o); + if (_sp_base <= oop_ptr && oop_ptr < _sp_top) { + int field_offset = (address)oop_ptr - (address)_sp_base; + bool found = false; + for (int j = 0; j < _objects->length(); j++) { + ScopeValue* o_sv = _objects->at(j); + if (o_sv->is_object() && o_sv->as_ObjectValue()->is_stack_object()) { + StackObjectValue *sov = (StackObjectValue *)o_sv; + Location o_loc = sov->get_stack_location(); + int o_offset = o_loc.stack_offset(); + if (o_offset == field_offset) { + o = sov->value()(); + found = true; + break; + } + } + } + assert(found, "Pointer to stack but did not find object to replace"); + } + assert(oopDesc::is_oop_or_null(o), "must be oop"); + _new->obj_field_put(fd->offset(), o); + break; + } + default: + ShouldNotReachHere(); + break; + } + } +}; + +void Deoptimization::reassign_stack_allocated_fields(frame *fr, GrowableArray* objects, ObjectValue *sv, Handle obj, Klass* k) { + StackObjectValue *sov = (StackObjectValue *)sv; + Location loc = sov->get_stack_location(); + address value_addr = ((address)fr->unextended_sp()) + loc.stack_offset(); + oop orig = cast_to_oop
(value_addr); + oop newly_allocated = obj(); + intptr_t *sp_base = fr->unextended_sp(); + intptr_t *sp_top = sp_base + fr->cb()->frame_size(); + + if (k->is_instance_klass()) { + InstanceKlass* ik = InstanceKlass::cast(k); + ReassignStackObjectFields reassign(orig, newly_allocated, sp_base, sp_top, objects); + ik->do_nonstatic_fields(&reassign); + } else if (k->is_typeArray_klass()) { + reassign_stack_allocated_type_array_elements(orig, newly_allocated, k); + } else if (k->is_objArray_klass()) { + reassign_stack_allocated_object_array_elements(orig, newly_allocated, sp_base, sp_top, objects); + } } // restore fields of all eliminated objects and arrays void Deoptimization::reassign_fields(frame* fr, RegisterMap* reg_map, GrowableArray* objects, bool realloc_failures, bool skip_internal) { for (int i = 0; i < objects->length(); i++) { @@ -1337,18 +1586,15 @@ // Don't reassign fields of boxes that came from a cache. Caches may be in CDS. if (sv->is_auto_box() && ((AutoBoxObjectValue*) sv)->is_cached()) { continue; } #endif // INCLUDE_JVMCI || INCLUDE_AOT - if (k->is_instance_klass()) { - InstanceKlass* ik = InstanceKlass::cast(k); - reassign_fields_by_klass(ik, fr, reg_map, sv, 0, obj(), skip_internal); - } else if (k->is_typeArray_klass()) { - TypeArrayKlass* ak = TypeArrayKlass::cast(k); - reassign_type_array_elements(fr, reg_map, sv, (typeArrayOop) obj(), ak->element_type()); - } else if (k->is_objArray_klass()) { - reassign_object_array_elements(fr, reg_map, sv, (objArrayOop) obj()); + + if (sv->is_stack_object()) { + reassign_stack_allocated_fields(fr, objects, sv, obj, k); + } else { + reassign_scalar_replaced_fields(fr, reg_map, objects, sv, obj, k, skip_internal); } } } diff a/src/hotspot/share/runtime/deoptimization.hpp b/src/hotspot/share/runtime/deoptimization.hpp --- a/src/hotspot/share/runtime/deoptimization.hpp +++ b/src/hotspot/share/runtime/deoptimization.hpp @@ -168,12 +168,19 @@ #if COMPILER2_OR_JVMCI public: // Support for restoring non-escaping objects static bool realloc_objects(JavaThread* thread, frame* fr, RegisterMap* reg_map, GrowableArray* objects, TRAPS); - static void reassign_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type); - static void reassign_object_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, objArrayOop obj); + static void reassign_stack_allocated_fields(frame *fr, GrowableArray* objects, ObjectValue *sv, Handle obj, Klass* k); + static void reassign_stack_allocated_type_array_elements(oop orig, oop newly_allocated, Klass *k); + static void reassign_stack_allocated_object_array_elements(oop orig, oop newly_allocated, intptr_t *sp_base, intptr_t *sp_top, GrowableArray* objects); + static void reassign_scalar_replaced_fields(frame *fr, RegisterMap *reg_map, GrowableArray* objects, ObjectValue *sv, Handle obj, Klass* k, bool skip_internal); + static void reassign_scalar_replaced_type_array_elements(frame* fr, RegisterMap* reg_map, ObjectValue* sv, typeArrayOop obj, BasicType type); + static void reassign_scalar_replaced_object_array_elements(frame* fr, RegisterMap* reg_map, GrowableArray* objects, ObjectValue* sv, objArrayOop obj); + static ScopeValue *get_scope_value(frame* fr, RegisterMap* reg_map, ScopeValue* sv, GrowableArray* objects); + static ScopeValue *match_object_to_stack_oop(intptr_t *oop_ptr, intptr_t *sp_base, GrowableArray* objects); + static void reassign_scalar_replaced_fields_by_klass(InstanceKlass* klass, frame* fr, RegisterMap* reg_map, GrowableArray* objects, ObjectValue* sv, int svIndex, oop obj, bool skip_internal); static void reassign_fields(frame* fr, RegisterMap* reg_map, GrowableArray* objects, bool realloc_failures, bool skip_internal); static void relock_objects(GrowableArray* monitors, JavaThread* thread, bool realloc_failures); static void pop_frames_failed_reallocs(JavaThread* thread, vframeArray* array); NOT_PRODUCT(static void print_objects(GrowableArray* objects, bool realloc_failures);) #endif // COMPILER2_OR_JVMCI diff a/src/hotspot/share/runtime/vframe_hp.cpp b/src/hotspot/share/runtime/vframe_hp.cpp --- a/src/hotspot/share/runtime/vframe_hp.cpp +++ b/src/hotspot/share/runtime/vframe_hp.cpp @@ -56,12 +56,13 @@ // scv_list is the list of ScopeValues describing the JVM stack state. // There is one scv_list entry for every JVM stack state in use. int length = scv_list->length(); StackValueCollection* result = new StackValueCollection(length); + GrowableArray* objects = scope()->objects(); for (int i = 0; i < length; i++) { - result->add(create_stack_value(scv_list->at(i))); + result->add(create_stack_value(get_scope_value(scv_list, i, objects))); } // Replace the original values with any stores that have been // performed through compiledVFrame::update_locals. GrowableArray* list = thread()->deferred_locals(); @@ -136,12 +137,13 @@ // scv_list is the list of ScopeValues describing the JVM stack state. // There is one scv_list entry for every JVM stack state in use. int length = scv_list->length(); StackValueCollection* result = new StackValueCollection(length); + GrowableArray* objects = scope()->objects(); for (int i = 0; i < length; i++) { - result->add(create_stack_value(scv_list->at(i))); + result->add(create_stack_value(get_scope_value(scv_list, i, objects))); } // Replace the original values with any stores that have been // performed through compiledVFrame::update_stack. GrowableArray* list = thread()->deferred_locals(); @@ -169,10 +171,83 @@ BasicLock* compiledVFrame::resolve_monitor_lock(Location location) const { return StackValue::resolve_monitor_lock(&_fr, location); } +ScopeValue *compiledVFrame::match_object_to_stack_oop(intptr_t *oop_ptr, intptr_t *sp_base, GrowableArray* objects) const { + if (objects == NULL) { + return NULL; + } + for (int j = 0; j < objects->length(); j++) { + ScopeValue* o_sv = objects->at(j); + if (o_sv->is_object()) { + if (o_sv->as_ObjectValue()->is_stack_object()) { + StackObjectValue *sov = (StackObjectValue *)o_sv; + Location o_loc = sov->get_stack_location(); + int o_offset = o_loc.stack_offset(); + int l_offset = (address)oop_ptr - (address)sp_base; + if (o_offset == l_offset) { + return o_sv; + } + } + } + } + + return NULL; +} + +ScopeValue *compiledVFrame::get_scope_value(GrowableArray* scv_list, int index, GrowableArray* objects) const { + ScopeValue* sv = scv_list->at(index); + if (sv->is_location()) { + if ((objects != NULL) && (objects->length() > 0)) { + //printf("Attempting to swap svs\n"); + LocationValue* lv = (LocationValue *)sv; + Location loc = lv->location(); + intptr_t *oop_ptr; + intptr_t *sp_base = _fr.unextended_sp(); + intptr_t *sp_top = sp_base + _fr.cb()->frame_size(); + if (loc.is_stack() && (loc.type() == Location::oop)) { + address value_addr = ((address)sp_base) + loc.stack_offset(); + oop val = *(oop *)value_addr; + oop_ptr = cast_from_oop(val); + } else if (loc.is_register() && (loc.type() == Location::oop)) { + address value_addr = register_map()->location(VMRegImpl::as_VMReg(loc.register_number())); + oop val = *(oop *)value_addr; + oop_ptr = cast_from_oop(val); + } else { + assert(loc.type() != Location::oop, "Can not be an oop"); + return sv; + } + if (sp_base <= oop_ptr && oop_ptr < sp_top) { + ScopeValue* o_sv = match_object_to_stack_oop(oop_ptr, sp_base, objects); + if (o_sv != NULL) { + scv_list->at_put(index, o_sv); + sv = o_sv; + } else { + assert(false, "did not find stack oop for object on stack"); + } + } + } + } else if (sv->is_object()) { + oop o = sv->as_ObjectValue()->value()(); + intptr_t *sp_base = _fr.unextended_sp(); + intptr_t *sp_top = sp_base + _fr.cb()->frame_size(); + intptr_t *oop_ptr = cast_from_oop(o); + if (sp_base <= oop_ptr && oop_ptr < sp_top) { + ScopeValue* o_sv = match_object_to_stack_oop(oop_ptr, sp_base, objects); + if (o_sv != NULL) { + assert(sv == o_sv, "Objects need to match"); + sv = o_sv; + } else { + assert(false, "did not find stack oop for object on stack"); + } + } + assert(oopDesc::is_oop_or_null(sv->as_ObjectValue()->value()()), "needs to be an oop"); + } + return sv; +} + GrowableArray* compiledVFrame::monitors() const { // Natives has no scope if (scope() == NULL) { CompiledMethod* nm = code(); diff a/src/hotspot/share/runtime/vframe_hp.hpp b/src/hotspot/share/runtime/vframe_hp.hpp --- a/src/hotspot/share/runtime/vframe_hp.hpp +++ b/src/hotspot/share/runtime/vframe_hp.hpp @@ -76,17 +76,21 @@ compiledVFrame* at_scope(int decode_offset, int vframe_id); // Returns SynchronizationEntryBCI or bci() (used for synchronization) int raw_bci() const; + // Used by stack allocation to match a stack oop to a described stack allocated object + ScopeValue *match_object_to_stack_oop(intptr_t *oop_ptr, intptr_t *sp_base, GrowableArray* objects) const; + protected: ScopeDesc* _scope; int _vframe_id; //StackValue resolve(ScopeValue* sv) const; BasicLock* resolve_monitor_lock(Location location) const; StackValue *create_stack_value(ScopeValue *sv) const; + ScopeValue *get_scope_value(GrowableArray* scv_list, int index, GrowableArray* objects) const; private: compiledVFrame(const frame* fr, const RegisterMap* reg_map, JavaThread* thread, ScopeDesc* scope, int vframe_id); #ifndef PRODUCT diff a/src/java.base/share/classes/java/util/ArrayList.java b/src/java.base/share/classes/java/util/ArrayList.java --- a/src/java.base/share/classes/java/util/ArrayList.java +++ b/src/java.base/share/classes/java/util/ArrayList.java @@ -233,12 +233,15 @@ int newCapacity = ArraysSupport.newLength(oldCapacity, minCapacity - oldCapacity, /* minimum growth */ oldCapacity >> 1 /* preferred growth */); return elementData = Arrays.copyOf(elementData, newCapacity); } else { - return elementData = new Object[Math.max(DEFAULT_CAPACITY, minCapacity)]; - } + if (DEFAULT_CAPACITY > minCapacity) { + return elementData = new Object[DEFAULT_CAPACITY]; + } + return elementData = new Object[minCapacity]; + } } private Object[] grow() { return grow(size + 1); } diff a/src/java.base/share/classes/java/util/regex/Matcher.java b/src/java.base/share/classes/java/util/regex/Matcher.java --- a/src/java.base/share/classes/java/util/regex/Matcher.java +++ b/src/java.base/share/classes/java/util/regex/Matcher.java @@ -242,12 +242,15 @@ Matcher(Pattern parent, CharSequence text) { this.parentPattern = parent; this.text = text; // Allocate state storage - int parentGroupCount = Math.max(parent.capturingGroupCount, 10); - groups = new int[parentGroupCount * 2]; + if (parent.capturingGroupCount > 10) { + groups = new int[parent.capturingGroupCount * 2]; + } else { + groups = new int[20]; + } locals = new int[parent.localCount]; localsPos = new IntHashSet[parent.localTCNCount]; // Put fields into initial states reset();